From fb296b6c28afb704e4346c9c61fb731e79b835b5 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Sat, 7 May 2022 14:49:22 +0800
Subject: [PATCH 01/12] reproduce issue

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 08065147e5..7e755dbe47 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -5,6 +5,9 @@ on:
   #   - cron: "0 2 * * *"  # at 02:00 UTC
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
+  push:
+    branches:
+      - 4234-fix-2204-nvfuser-issue
 
 jobs:
   cron-gpu:
@@ -103,7 +106,6 @@ jobs:
 
   cron-pip:
     # pip install monai[all] and use it to run unit tests
-    if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
         container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
@@ -167,7 +169,7 @@ jobs:
           python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
 
           python -m pip install -r requirements-dev.txt
-          PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/runner.py -p 'test_((?!integration).)'  # unit tests
+          PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/test_dynunet.py  # unit tests
           if pgrep python; then pkill python; fi
 
   cron-docker:

From ce28bd8ad397ae98618675c6a3c5fab541c40675 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Sat, 7 May 2022 14:52:21 +0800
Subject: [PATCH 02/12] remove 22.01 02

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 7e755dbe47..4823259597 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -108,7 +108,7 @@ jobs:
     # pip install monai[all] and use it to run unit tests
     strategy:
       matrix:
-        container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
+        container: ["pytorch:22.04"]  # 21.02, 21.10 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"

From d15d6c03b3fb106c9f3094a59402ec74e412bda6 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Sat, 7 May 2022 15:05:38 +0800
Subject: [PATCH 03/12] remove other workflows

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 319 +++++++++++++++++++------------------
 1 file changed, 160 insertions(+), 159 deletions(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 4823259597..f6c27565b6 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -10,102 +10,103 @@ on:
       - 4234-fix-2204-nvfuser-issue
 
 jobs:
-  cron-gpu:
-    if: github.repository == 'Project-MONAI/MONAI'
-    container:
-      image: nvcr.io/nvidia/pytorch:21.06-py3  # CUDA 11.3
-      options: "--gpus all"
-    runs-on: [self-hosted, linux, x64, common]
-    strategy:
-      matrix:
-        pytorch-version: [1.7.1, 1.8.1, 1.9.1, 1.10.2, latest]
-    steps:
-    - uses: actions/checkout@v2
-    - name: Install the dependencies
-      run: |
-        which python
-        python -m pip install --upgrade pip wheel
-        python -m pip uninstall -y torch torchvision
-        if [ ${{ matrix.pytorch-version }} == "latest" ]; then
-          python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
-        elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then
-          python -m pip install torch==1.7.1 torchvision==0.8.2 --extra-index-url https://download.pytorch.org/whl/cu113
-        elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then
-          python -m pip install torch==1.8.1 torchvision==0.9.1 --extra-index-url https://download.pytorch.org/whl/cu113
-        elif [ ${{ matrix.pytorch-version }} == "1.9.1" ]; then
-          python -m pip install torch==1.9.1 torchvision==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu113
-        elif [ ${{ matrix.pytorch-version }} == "1.10.2" ]; then
-          python -m pip install torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu113
-        fi
-        python -m pip install -r requirements-dev.txt
-        python -m pip list
-    - name: Run tests report coverage
-      run: |
-        export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-        echo "Sleep $LAUNCH_DELAY"
-        sleep $LAUNCH_DELAY
-        nvidia-smi
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-        python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-        coverage xml
-        if pgrep python; then pkill python; fi
-    - name: Upload coverage
-      uses: codecov/codecov-action@v1
-      with:
-        fail_ci_if_error: false
-        file: ./coverage.xml
+  # cron-gpu:
+  #   if: github.repository == 'Project-MONAI/MONAI'
+  #   container:
+  #     image: nvcr.io/nvidia/pytorch:21.06-py3  # CUDA 11.3
+  #     options: "--gpus all"
+  #   runs-on: [self-hosted, linux, x64, common]
+  #   strategy:
+  #     matrix:
+  #       pytorch-version: [1.7.1, 1.8.1, 1.9.1, 1.10.2, latest]
+  #   steps:
+  #   - uses: actions/checkout@v2
+  #   - name: Install the dependencies
+  #     run: |
+  #       which python
+  #       python -m pip install --upgrade pip wheel
+  #       python -m pip uninstall -y torch torchvision
+  #       if [ ${{ matrix.pytorch-version }} == "latest" ]; then
+  #         python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
+  #       elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then
+  #         python -m pip install torch==1.7.1 torchvision==0.8.2 --extra-index-url https://download.pytorch.org/whl/cu113
+  #       elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then
+  #         python -m pip install torch==1.8.1 torchvision==0.9.1 --extra-index-url https://download.pytorch.org/whl/cu113
+  #       elif [ ${{ matrix.pytorch-version }} == "1.9.1" ]; then
+  #         python -m pip install torch==1.9.1 torchvision==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu113
+  #       elif [ ${{ matrix.pytorch-version }} == "1.10.2" ]; then
+  #         python -m pip install torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu113
+  #       fi
+  #       python -m pip install -r requirements-dev.txt
+  #       python -m pip list
+  #   - name: Run tests report coverage
+  #     run: |
+  #       export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
+  #       echo "Sleep $LAUNCH_DELAY"
+  #       sleep $LAUNCH_DELAY
+  #       nvidia-smi
+  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+  #       echo $CUDA_VISIBLE_DEVICES
+  #       trap 'if pgrep python; then pkill python; fi;' ERR
+  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+  #       python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+  #       coverage xml
+  #       if pgrep python; then pkill python; fi
+  #   - name: Upload coverage
+  #     uses: codecov/codecov-action@v1
+  #     with:
+  #       fail_ci_if_error: false
+  #       file: ./coverage.xml
 
-  cron-pt-image:
-    if: github.repository == 'Project-MONAI/MONAI'
-    strategy:
-      matrix:
-        container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
-    container:
-      image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
-      options: "--gpus all"
-    runs-on: [self-hosted, linux, x64, common]
-    steps:
-    - uses: actions/checkout@v2
-    - name: Install APT dependencies
-      run: |
-        apt-get update
-        DEBIAN_FRONTEND="noninteractive" apt-get install -y libopenslide0
-    - name: Install Python dependencies
-      run: |
-        which python
-        python -m pip install --upgrade pip wheel
-        python -m pip install -r requirements-dev.txt
-        python -m pip list
-    - name: Run tests report coverage
-      run: |
-        export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-        echo "Sleep $LAUNCH_DELAY"
-        sleep $LAUNCH_DELAY
-        nvidia-smi
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-        python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-        coverage xml
-        if pgrep python; then pkill python; fi
-    - name: Upload coverage
-      uses: codecov/codecov-action@v1
-      with:
-        fail_ci_if_error: false
-        file: ./coverage.xml
+  # cron-pt-image:
+  #   if: github.repository == 'Project-MONAI/MONAI'
+  #   strategy:
+  #     matrix:
+  #       container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
+  #   container:
+  #     image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
+  #     options: "--gpus all"
+  #   runs-on: [self-hosted, linux, x64, common]
+  #   steps:
+  #   - uses: actions/checkout@v2
+  #   - name: Install APT dependencies
+  #     run: |
+  #       apt-get update
+  #       DEBIAN_FRONTEND="noninteractive" apt-get install -y libopenslide0
+  #   - name: Install Python dependencies
+  #     run: |
+  #       which python
+  #       python -m pip install --upgrade pip wheel
+  #       python -m pip install -r requirements-dev.txt
+  #       python -m pip list
+  #   - name: Run tests report coverage
+  #     run: |
+  #       export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
+  #       echo "Sleep $LAUNCH_DELAY"
+  #       sleep $LAUNCH_DELAY
+  #       nvidia-smi
+  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+  #       echo $CUDA_VISIBLE_DEVICES
+  #       trap 'if pgrep python; then pkill python; fi;' ERR
+  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+  #       python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+  #       coverage xml
+  #       if pgrep python; then pkill python; fi
+  #   - name: Upload coverage
+  #     uses: codecov/codecov-action@v1
+  #     with:
+  #       fail_ci_if_error: false
+  #       file: ./coverage.xml
 
   cron-pip:
     # pip install monai[all] and use it to run unit tests
+    if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
         container: ["pytorch:22.04"]  # 21.02, 21.10 for backward comp.
@@ -172,71 +173,71 @@ jobs:
           PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/test_dynunet.py  # unit tests
           if pgrep python; then pkill python; fi
 
-  cron-docker:
-    if: github.repository == 'Project-MONAI/MONAI'
-    container:
-      image: docker://projectmonai/monai:latest  # this might be slow and has the pull count limitations
-      options: "--gpus all"
-    runs-on: [self-hosted, linux, x64, common]
-    steps:
-    - name: Run tests report coverage
-      # The docker image process has done the compilation.
-      # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag.
-      run: |
-        cd /opt/monai
-        nvidia-smi
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-        python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
-        ngc --version
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --pytype --unittests --disttests  # unit tests with pytype checks, coverage report
-        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-        coverage xml
-        if pgrep python; then pkill python; fi
-    - name: Upload coverage
-      uses: codecov/codecov-action@v1
-      with:
-        fail_ci_if_error: false
-        file: ./coverage.xml
+  # cron-docker:
+  #   if: github.repository == 'Project-MONAI/MONAI'
+  #   container:
+  #     image: docker://projectmonai/monai:latest  # this might be slow and has the pull count limitations
+  #     options: "--gpus all"
+  #   runs-on: [self-hosted, linux, x64, common]
+  #   steps:
+  #   - name: Run tests report coverage
+  #     # The docker image process has done the compilation.
+  #     # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag.
+  #     run: |
+  #       cd /opt/monai
+  #       nvidia-smi
+  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+  #       echo $CUDA_VISIBLE_DEVICES
+  #       trap 'if pgrep python; then pkill python; fi;' ERR
+  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+  #       python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
+  #       ngc --version
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --pytype --unittests --disttests  # unit tests with pytype checks, coverage report
+  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+  #       coverage xml
+  #       if pgrep python; then pkill python; fi
+  #   - name: Upload coverage
+  #     uses: codecov/codecov-action@v1
+  #     with:
+  #       fail_ci_if_error: false
+  #       file: ./coverage.xml
 
-  cron-tutorial-notebooks:
-    if: github.repository == 'Project-MONAI/MONAI'
-    needs: cron-gpu  # so that monai itself is verified first
-    container:
-      image: nvcr.io/nvidia/pytorch:22.04-py3  # testing with the latest pytorch base image
-      options: "--gpus all --ipc=host"
-    runs-on: [self-hosted, linux, x64, common]
-    steps:
-    - uses: actions/checkout@v2
-    - name: Install MONAI
-      id: monai-install
-      run: |
-        which python
-        python -m pip install --upgrade pip wheel
-        python -m pip install -r requirements-dev.txt
-        BUILD_MONAI=1 python setup.py develop  # install monai
-        nvidia-smi
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
-        echo "::set-output name=devices::$CUDA_VISIBLE_DEVICES"
-    - name: Checkout tutorials and install their requirements
-      run: |
-        cd /opt
-        git clone --depth 1 --branch master --single-branch https://github.com/Project-MONAI/tutorials.git  # latest commit of master branch
-        cd tutorials
-        python -m pip install -r requirements.txt
-    - name: Run tutorial notebooks
-      timeout-minutes: 150
-      run: |
-        export CUDA_VISIBLE_DEVICES=${{ steps.monai-install.outputs.devices }}
-        echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        cd /opt/tutorials
-        python -c 'import monai; monai.config.print_debug_info()'
-        $(pwd)/runner.sh
-        python -c 'import monai; monai.config.print_debug_info()'
-        if pgrep python; then pkill python; fi
+  # cron-tutorial-notebooks:
+  #   if: github.repository == 'Project-MONAI/MONAI'
+  #   needs: cron-gpu  # so that monai itself is verified first
+  #   container:
+  #     image: nvcr.io/nvidia/pytorch:22.04-py3  # testing with the latest pytorch base image
+  #     options: "--gpus all --ipc=host"
+  #   runs-on: [self-hosted, linux, x64, common]
+  #   steps:
+  #   - uses: actions/checkout@v2
+  #   - name: Install MONAI
+  #     id: monai-install
+  #     run: |
+  #       which python
+  #       python -m pip install --upgrade pip wheel
+  #       python -m pip install -r requirements-dev.txt
+  #       BUILD_MONAI=1 python setup.py develop  # install monai
+  #       nvidia-smi
+  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+  #       echo $CUDA_VISIBLE_DEVICES
+  #       echo "::set-output name=devices::$CUDA_VISIBLE_DEVICES"
+  #   - name: Checkout tutorials and install their requirements
+  #     run: |
+  #       cd /opt
+  #       git clone --depth 1 --branch master --single-branch https://github.com/Project-MONAI/tutorials.git  # latest commit of master branch
+  #       cd tutorials
+  #       python -m pip install -r requirements.txt
+  #   - name: Run tutorial notebooks
+  #     timeout-minutes: 150
+  #     run: |
+  #       export CUDA_VISIBLE_DEVICES=${{ steps.monai-install.outputs.devices }}
+  #       echo $CUDA_VISIBLE_DEVICES
+  #       trap 'if pgrep python; then pkill python; fi;' ERR
+  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+  #       cd /opt/tutorials
+  #       python -c 'import monai; monai.config.print_debug_info()'
+  #       $(pwd)/runner.sh
+  #       python -c 'import monai; monai.config.print_debug_info()'
+  #       if pgrep python; then pkill python; fi

From 72e9a2f3c463c123b0d02ead3be641a29c380b3a Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Sat, 7 May 2022 15:39:18 +0800
Subject: [PATCH 04/12] run on pull request

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index f6c27565b6..409b1ac5f7 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -5,9 +5,7 @@ on:
   #   - cron: "0 2 * * *"  # at 02:00 UTC
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
-  push:
-    branches:
-      - 4234-fix-2204-nvfuser-issue
+  pull_request:
 
 jobs:
   # cron-gpu:

From 4a04ab4e69ce492a223b7b7e6535c25884bfa6c0 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Sat, 7 May 2022 15:50:11 +0800
Subject: [PATCH 05/12] remove sleep

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 409b1ac5f7..8c7386b6d7 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -158,8 +158,8 @@ jobs:
           ls -al
 
           export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-          echo "Sleep $LAUNCH_DELAY"
-          sleep $LAUNCH_DELAY
+          # echo "Sleep $LAUNCH_DELAY"
+          # sleep $LAUNCH_DELAY
           nvidia-smi
           export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
           echo $CUDA_VISIBLE_DEVICES

From 281a9eab71d725f4decb3ff01b36c6dad68f6ab5 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Tue, 10 May 2022 22:29:20 +0800
Subject: [PATCH 06/12] test single layer forward

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 tests/test_dynunet.py | 43 +++++++++++++++++++++++--------------------
 tests/utils.py        | 22 ++++++++++++++++++++++
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index 14006b96e6..9ad8cf6217 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -20,7 +20,7 @@
 from monai.utils import optional_import
 from tests.utils import skip_if_no_cuda, skip_if_windows, test_script_save
 
-_, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
+InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -127,25 +127,28 @@ def test_script(self):
 class TestDynUNetWithInstanceNorm3dNVFuser(unittest.TestCase):
     @parameterized.expand([TEST_CASE_DYNUNET_3D[0]])
     def test_consistency(self, input_param, input_shape, _):
-        for eps in [1e-4, 1e-5]:
-            for momentum in [0.1, 0.01]:
-                for affine in [True, False]:
-                    norm_param = {"eps": eps, "momentum": momentum, "affine": affine}
-                    input_param["norm_name"] = ("instance", norm_param)
-                    input_param_fuser = input_param.copy()
-                    input_param_fuser["norm_name"] = ("instance_nvfuser", norm_param)
-                    for memory_format in [torch.contiguous_format, torch.channels_last_3d]:
-                        net = DynUNet(**input_param).to("cuda:0", memory_format=memory_format)
-                        net_fuser = DynUNet(**input_param_fuser).to("cuda:0", memory_format=memory_format)
-                        net_fuser.load_state_dict(net.state_dict())
-
-                        input_tensor = torch.randn(input_shape).to("cuda:0", memory_format=memory_format)
-                        with eval_mode(net):
-                            result = net(input_tensor)
-                        with eval_mode(net_fuser):
-                            result_fuser = net_fuser(input_tensor)
-
-                        torch.testing.assert_close(result, result_fuser)
+        layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
+        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
+        out = layer(inp)
+        # for eps in [1e-4, 1e-5]:
+        #     for momentum in [0.1, 0.01]:
+        #         for affine in [True, False]:
+        #             norm_param = {"eps": eps, "momentum": momentum, "affine": affine}
+        #             input_param["norm_name"] = ("instance", norm_param)
+        #             input_param_fuser = input_param.copy()
+        #             input_param_fuser["norm_name"] = ("instance_nvfuser", norm_param)
+        #             for memory_format in [torch.contiguous_format, torch.channels_last_3d]:
+        #                 net = DynUNet(**input_param).to("cuda:0", memory_format=memory_format)
+        #                 net_fuser = DynUNet(**input_param_fuser).to("cuda:0", memory_format=memory_format)
+        #                 net_fuser.load_state_dict(net.state_dict())
+
+        #                 input_tensor = torch.randn(input_shape).to("cuda:0", memory_format=memory_format)
+        #                 with eval_mode(net):
+        #                     result = net(input_tensor)
+        #                 with eval_mode(net_fuser):
+        #                     result_fuser = net_fuser(input_tensor)
+
+        #                 torch.testing.assert_close(result, result_fuser)
 
 
 class TestDynUNetDeepSupervision(unittest.TestCase):
diff --git a/tests/utils.py b/tests/utils.py
index 1a547fc2d2..f0a3f7d716 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -278,6 +278,28 @@ def has_cupy():
 HAS_CUPY = has_cupy()
 
 
+# def has_nvfuser():
+#     """
+#     Returns True if the user has installed a proper version of apex that contains `normalization.InstanceNorm3dNVFuser`.
+#     """
+#     InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
+#     if not has_nvfuser:
+#         return False
+#     if not torch.cuda.is_available():
+#         return False
+#     try:  # test nvfuser installation with a basic example
+#         layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
+#         inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
+#         out = layer(inp)
+#         del inp, out
+#         return True
+#     except Exception:
+#         return False
+
+
+# HAS_NVFUSER = has_nvfuser()
+
+
 def make_nifti_image(array: NdarrayOrTensor, affine=None, dir=None, fname=None, suffix=".nii.gz", verbose=False):
     """
     Create a temporary nifti image on the disk and return the image name.

From 7db3530c70d414ea0a3a32dfa65e47b4e6ee3a8b Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Tue, 10 May 2022 22:51:50 +0800
Subject: [PATCH 07/12] add has_nvfuser

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 tests/test_dynunet.py | 48 +++++++++++++++++++------------------------
 tests/utils.py        | 40 ++++++++++++++++++------------------
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index 9ad8cf6217..d06892602f 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -17,10 +17,7 @@
 
 from monai.networks import eval_mode
 from monai.networks.nets import DynUNet
-from monai.utils import optional_import
-from tests.utils import skip_if_no_cuda, skip_if_windows, test_script_save
-
-InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
+from tests.utils import HAS_NVFUSER, skip_if_no_cuda, skip_if_windows, test_script_save
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -123,32 +120,29 @@ def test_script(self):
 
 @skip_if_no_cuda
 @skip_if_windows
-@unittest.skipUnless(has_nvfuser, "To use `instance_nvfuser`, `apex.normalization.InstanceNorm3dNVFuser` is needed.")
+@unittest.skipUnless(HAS_NVFUSER, "To use `instance_nvfuser`, `apex.normalization.InstanceNorm3dNVFuser` is needed.")
 class TestDynUNetWithInstanceNorm3dNVFuser(unittest.TestCase):
     @parameterized.expand([TEST_CASE_DYNUNET_3D[0]])
     def test_consistency(self, input_param, input_shape, _):
-        layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
-        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
-        out = layer(inp)
-        # for eps in [1e-4, 1e-5]:
-        #     for momentum in [0.1, 0.01]:
-        #         for affine in [True, False]:
-        #             norm_param = {"eps": eps, "momentum": momentum, "affine": affine}
-        #             input_param["norm_name"] = ("instance", norm_param)
-        #             input_param_fuser = input_param.copy()
-        #             input_param_fuser["norm_name"] = ("instance_nvfuser", norm_param)
-        #             for memory_format in [torch.contiguous_format, torch.channels_last_3d]:
-        #                 net = DynUNet(**input_param).to("cuda:0", memory_format=memory_format)
-        #                 net_fuser = DynUNet(**input_param_fuser).to("cuda:0", memory_format=memory_format)
-        #                 net_fuser.load_state_dict(net.state_dict())
-
-        #                 input_tensor = torch.randn(input_shape).to("cuda:0", memory_format=memory_format)
-        #                 with eval_mode(net):
-        #                     result = net(input_tensor)
-        #                 with eval_mode(net_fuser):
-        #                     result_fuser = net_fuser(input_tensor)
-
-        #                 torch.testing.assert_close(result, result_fuser)
+        for eps in [1e-4, 1e-5]:
+            for momentum in [0.1, 0.01]:
+                for affine in [True, False]:
+                    norm_param = {"eps": eps, "momentum": momentum, "affine": affine}
+                    input_param["norm_name"] = ("instance", norm_param)
+                    input_param_fuser = input_param.copy()
+                    input_param_fuser["norm_name"] = ("instance_nvfuser", norm_param)
+                    for memory_format in [torch.contiguous_format, torch.channels_last_3d]:
+                        net = DynUNet(**input_param).to("cuda:0", memory_format=memory_format)
+                        net_fuser = DynUNet(**input_param_fuser).to("cuda:0", memory_format=memory_format)
+                        net_fuser.load_state_dict(net.state_dict())
+
+                        input_tensor = torch.randn(input_shape).to("cuda:0", memory_format=memory_format)
+                        with eval_mode(net):
+                            result = net(input_tensor)
+                        with eval_mode(net_fuser):
+                            result_fuser = net_fuser(input_tensor)
+
+                        torch.testing.assert_close(result, result_fuser)
 
 
 class TestDynUNetDeepSupervision(unittest.TestCase):
diff --git a/tests/utils.py b/tests/utils.py
index f0a3f7d716..6e5fa77c02 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -278,26 +278,26 @@ def has_cupy():
 HAS_CUPY = has_cupy()
 
 
-# def has_nvfuser():
-#     """
-#     Returns True if the user has installed a proper version of apex that contains `normalization.InstanceNorm3dNVFuser`.
-#     """
-#     InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
-#     if not has_nvfuser:
-#         return False
-#     if not torch.cuda.is_available():
-#         return False
-#     try:  # test nvfuser installation with a basic example
-#         layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
-#         inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
-#         out = layer(inp)
-#         del inp, out
-#         return True
-#     except Exception:
-#         return False
-
-
-# HAS_NVFUSER = has_nvfuser()
+def has_nvfuser():
+    """
+    Returns True if the user has installed a proper version of apex that contains `normalization.InstanceNorm3dNVFuser`.
+    """
+    instancenorm_3dnvfuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
+    if not has_nvfuser:
+        return False
+    if not torch.cuda.is_available():
+        return False
+    try:  # test nvfuser installation with a basic example
+        layer = instancenorm_3dnvfuser(num_features=1, affine=True).to("cuda:0")
+        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
+        out = layer(inp)
+        del inp, out
+        return True
+    except Exception:
+        return False
+
+
+HAS_NVFUSER = has_nvfuser()
 
 
 def make_nifti_image(array: NdarrayOrTensor, affine=None, dir=None, fname=None, suffix=".nii.gz", verbose=False):

From 4e84f691d7b65a857f7c8eab9162f0585b6061fa Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Tue, 10 May 2022 23:10:25 +0800
Subject: [PATCH 08/12] add check within factory

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 monai/networks/layers/factories.py | 19 +++++++++++++++++--
 tests/test_dynunet.py              |  3 +--
 tests/utils.py                     | 22 ----------------------
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/monai/networks/layers/factories.py b/monai/networks/layers/factories.py
index b808c24de0..58a04b122b 100644
--- a/monai/networks/layers/factories.py
+++ b/monai/networks/layers/factories.py
@@ -63,12 +63,14 @@ def use_factory(fact_args):
 import warnings
 from typing import Any, Callable, Dict, Tuple, Type, Union
 
+import torch
 import torch.nn as nn
 
 from monai.utils import look_up_option, optional_import
 
 InstanceNorm3dNVFuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
 
+
 __all__ = ["LayerFactory", "Dropout", "Norm", "Act", "Conv", "Pool", "Pad", "split_args"]
 
 
@@ -263,8 +265,21 @@ def instance_nvfuser_factory(dim):
     if dim != 3:
         warnings.warn(f"`InstanceNorm3dNVFuser` only supports 3d cases, use {types[dim - 1]} instead.")
         return types[dim - 1]
-    if not has_nvfuser:
-        warnings.warn("`apex.normalization.InstanceNorm3dNVFuser` is not found, use nn.InstanceNorm3d instead.")
+    # test InstanceNorm3dNVFuser installation with a basic example
+    has_nvfuser_flag = has_nvfuser
+    if not torch.cuda.is_available():
+        has_nvfuser_flag = False
+    try:
+        layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
+        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
+        out = layer(inp)
+        del inp, out, layer
+    except Exception:
+        has_nvfuser_flag = False
+    if not has_nvfuser_flag:
+        warnings.warn(
+            "`apex.normalization.InstanceNorm3dNVFuser` is not installed properly, use nn.InstanceNorm3d instead."
+        )
         return nn.InstanceNorm3d
     return InstanceNorm3dNVFuser
 
diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index d06892602f..a35ee36c95 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -17,7 +17,7 @@
 
 from monai.networks import eval_mode
 from monai.networks.nets import DynUNet
-from tests.utils import HAS_NVFUSER, skip_if_no_cuda, skip_if_windows, test_script_save
+from tests.utils import skip_if_no_cuda, skip_if_windows, test_script_save
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -120,7 +120,6 @@ def test_script(self):
 
 @skip_if_no_cuda
 @skip_if_windows
-@unittest.skipUnless(HAS_NVFUSER, "To use `instance_nvfuser`, `apex.normalization.InstanceNorm3dNVFuser` is needed.")
 class TestDynUNetWithInstanceNorm3dNVFuser(unittest.TestCase):
     @parameterized.expand([TEST_CASE_DYNUNET_3D[0]])
     def test_consistency(self, input_param, input_shape, _):
diff --git a/tests/utils.py b/tests/utils.py
index 6e5fa77c02..1a547fc2d2 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -278,28 +278,6 @@ def has_cupy():
 HAS_CUPY = has_cupy()
 
 
-def has_nvfuser():
-    """
-    Returns True if the user has installed a proper version of apex that contains `normalization.InstanceNorm3dNVFuser`.
-    """
-    instancenorm_3dnvfuser, has_nvfuser = optional_import("apex.normalization", name="InstanceNorm3dNVFuser")
-    if not has_nvfuser:
-        return False
-    if not torch.cuda.is_available():
-        return False
-    try:  # test nvfuser installation with a basic example
-        layer = instancenorm_3dnvfuser(num_features=1, affine=True).to("cuda:0")
-        inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")
-        out = layer(inp)
-        del inp, out
-        return True
-    except Exception:
-        return False
-
-
-HAS_NVFUSER = has_nvfuser()
-
-
 def make_nifti_image(array: NdarrayOrTensor, affine=None, dir=None, fname=None, suffix=".nii.gz", verbose=False):
     """
     Create a temporary nifti image on the disk and return the image name.

From 0a5be1a7fcd7ec709ec87c453519f4f5501f0850 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Wed, 11 May 2022 11:46:49 +0800
Subject: [PATCH 09/12] revert to original cron.yml

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 .github/workflows/cron.yml | 327 ++++++++++++++++++-------------------
 1 file changed, 163 insertions(+), 164 deletions(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 8c7386b6d7..4ef8652e97 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -5,109 +5,108 @@ on:
   #   - cron: "0 2 * * *"  # at 02:00 UTC
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
-  pull_request:
 
 jobs:
-  # cron-gpu:
-  #   if: github.repository == 'Project-MONAI/MONAI'
-  #   container:
-  #     image: nvcr.io/nvidia/pytorch:21.06-py3  # CUDA 11.3
-  #     options: "--gpus all"
-  #   runs-on: [self-hosted, linux, x64, common]
-  #   strategy:
-  #     matrix:
-  #       pytorch-version: [1.7.1, 1.8.1, 1.9.1, 1.10.2, latest]
-  #   steps:
-  #   - uses: actions/checkout@v2
-  #   - name: Install the dependencies
-  #     run: |
-  #       which python
-  #       python -m pip install --upgrade pip wheel
-  #       python -m pip uninstall -y torch torchvision
-  #       if [ ${{ matrix.pytorch-version }} == "latest" ]; then
-  #         python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
-  #       elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then
-  #         python -m pip install torch==1.7.1 torchvision==0.8.2 --extra-index-url https://download.pytorch.org/whl/cu113
-  #       elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then
-  #         python -m pip install torch==1.8.1 torchvision==0.9.1 --extra-index-url https://download.pytorch.org/whl/cu113
-  #       elif [ ${{ matrix.pytorch-version }} == "1.9.1" ]; then
-  #         python -m pip install torch==1.9.1 torchvision==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu113
-  #       elif [ ${{ matrix.pytorch-version }} == "1.10.2" ]; then
-  #         python -m pip install torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu113
-  #       fi
-  #       python -m pip install -r requirements-dev.txt
-  #       python -m pip list
-  #   - name: Run tests report coverage
-  #     run: |
-  #       export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-  #       echo "Sleep $LAUNCH_DELAY"
-  #       sleep $LAUNCH_DELAY
-  #       nvidia-smi
-  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-  #       echo $CUDA_VISIBLE_DEVICES
-  #       trap 'if pgrep python; then pkill python; fi;' ERR
-  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-  #       python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-  #       coverage xml
-  #       if pgrep python; then pkill python; fi
-  #   - name: Upload coverage
-  #     uses: codecov/codecov-action@v1
-  #     with:
-  #       fail_ci_if_error: false
-  #       file: ./coverage.xml
+  cron-gpu:
+    if: github.repository == 'Project-MONAI/MONAI'
+    container:
+      image: nvcr.io/nvidia/pytorch:21.06-py3  # CUDA 11.3
+      options: "--gpus all"
+    runs-on: [self-hosted, linux, x64, common]
+    strategy:
+      matrix:
+        pytorch-version: [1.7.1, 1.8.1, 1.9.1, 1.10.2, latest]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Install the dependencies
+      run: |
+        which python
+        python -m pip install --upgrade pip wheel
+        python -m pip uninstall -y torch torchvision
+        if [ ${{ matrix.pytorch-version }} == "latest" ]; then
+          python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
+        elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then
+          python -m pip install torch==1.7.1 torchvision==0.8.2 --extra-index-url https://download.pytorch.org/whl/cu113
+        elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then
+          python -m pip install torch==1.8.1 torchvision==0.9.1 --extra-index-url https://download.pytorch.org/whl/cu113
+        elif [ ${{ matrix.pytorch-version }} == "1.9.1" ]; then
+          python -m pip install torch==1.9.1 torchvision==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu113
+        elif [ ${{ matrix.pytorch-version }} == "1.10.2" ]; then
+          python -m pip install torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu113
+        fi
+        python -m pip install -r requirements-dev.txt
+        python -m pip list
+    - name: Run tests report coverage
+      run: |
+        export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
+        echo "Sleep $LAUNCH_DELAY"
+        sleep $LAUNCH_DELAY
+        nvidia-smi
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+        python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+        coverage xml
+        if pgrep python; then pkill python; fi
+    - name: Upload coverage
+      uses: codecov/codecov-action@v1
+      with:
+        fail_ci_if_error: false
+        file: ./coverage.xml
 
-  # cron-pt-image:
-  #   if: github.repository == 'Project-MONAI/MONAI'
-  #   strategy:
-  #     matrix:
-  #       container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
-  #   container:
-  #     image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
-  #     options: "--gpus all"
-  #   runs-on: [self-hosted, linux, x64, common]
-  #   steps:
-  #   - uses: actions/checkout@v2
-  #   - name: Install APT dependencies
-  #     run: |
-  #       apt-get update
-  #       DEBIAN_FRONTEND="noninteractive" apt-get install -y libopenslide0
-  #   - name: Install Python dependencies
-  #     run: |
-  #       which python
-  #       python -m pip install --upgrade pip wheel
-  #       python -m pip install -r requirements-dev.txt
-  #       python -m pip list
-  #   - name: Run tests report coverage
-  #     run: |
-  #       export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-  #       echo "Sleep $LAUNCH_DELAY"
-  #       sleep $LAUNCH_DELAY
-  #       nvidia-smi
-  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-  #       echo $CUDA_VISIBLE_DEVICES
-  #       trap 'if pgrep python; then pkill python; fi;' ERR
-  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-  #       python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-  #       coverage xml
-  #       if pgrep python; then pkill python; fi
-  #   - name: Upload coverage
-  #     uses: codecov/codecov-action@v1
-  #     with:
-  #       fail_ci_if_error: false
-  #       file: ./coverage.xml
+  cron-pt-image:
+    if: github.repository == 'Project-MONAI/MONAI'
+    strategy:
+      matrix:
+        container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
+    container:
+      image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
+      options: "--gpus all"
+    runs-on: [self-hosted, linux, x64, common]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Install APT dependencies
+      run: |
+        apt-get update
+        DEBIAN_FRONTEND="noninteractive" apt-get install -y libopenslide0
+    - name: Install Python dependencies
+      run: |
+        which python
+        python -m pip install --upgrade pip wheel
+        python -m pip install -r requirements-dev.txt
+        python -m pip list
+    - name: Run tests report coverage
+      run: |
+        export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
+        echo "Sleep $LAUNCH_DELAY"
+        sleep $LAUNCH_DELAY
+        nvidia-smi
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+        python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --unittests --disttests  # unit tests with coverage report
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+        coverage xml
+        if pgrep python; then pkill python; fi
+    - name: Upload coverage
+      uses: codecov/codecov-action@v1
+      with:
+        fail_ci_if_error: false
+        file: ./coverage.xml
 
   cron-pip:
     # pip install monai[all] and use it to run unit tests
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:22.04"]  # 21.02, 21.10 for backward comp.
+        container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.04"]  # 21.02, 21.10 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -158,8 +157,8 @@ jobs:
           ls -al
 
           export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ]
-          # echo "Sleep $LAUNCH_DELAY"
-          # sleep $LAUNCH_DELAY
+          echo "Sleep $LAUNCH_DELAY"
+          sleep $LAUNCH_DELAY
           nvidia-smi
           export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
           echo $CUDA_VISIBLE_DEVICES
@@ -168,74 +167,74 @@ jobs:
           python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
 
           python -m pip install -r requirements-dev.txt
-          PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/test_dynunet.py  # unit tests
+          PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/runner.py -p 'test_((?!integration).)'  # unit tests
           if pgrep python; then pkill python; fi
 
-  # cron-docker:
-  #   if: github.repository == 'Project-MONAI/MONAI'
-  #   container:
-  #     image: docker://projectmonai/monai:latest  # this might be slow and has the pull count limitations
-  #     options: "--gpus all"
-  #   runs-on: [self-hosted, linux, x64, common]
-  #   steps:
-  #   - name: Run tests report coverage
-  #     # The docker image process has done the compilation.
-  #     # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag.
-  #     run: |
-  #       cd /opt/monai
-  #       nvidia-smi
-  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-  #       echo $CUDA_VISIBLE_DEVICES
-  #       trap 'if pgrep python; then pkill python; fi;' ERR
-  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-  #       python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
-  #       python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
-  #       ngc --version
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --pytype --unittests --disttests  # unit tests with pytype checks, coverage report
-  #       BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
-  #       coverage xml
-  #       if pgrep python; then pkill python; fi
-  #   - name: Upload coverage
-  #     uses: codecov/codecov-action@v1
-  #     with:
-  #       fail_ci_if_error: false
-  #       file: ./coverage.xml
+  cron-docker:
+    if: github.repository == 'Project-MONAI/MONAI'
+    container:
+      image: docker://projectmonai/monai:latest  # this might be slow and has the pull count limitations
+      options: "--gpus all"
+    runs-on: [self-hosted, linux, x64, common]
+    steps:
+    - name: Run tests report coverage
+      # The docker image process has done the compilation.
+      # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag.
+      run: |
+        cd /opt/monai
+        nvidia-smi
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
+        python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
+        ngc --version
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --pytype --unittests --disttests  # unit tests with pytype checks, coverage report
+        BUILD_MONAI=1 ./runtests.sh --build --coverage --net  # integration tests with coverage report
+        coverage xml
+        if pgrep python; then pkill python; fi
+    - name: Upload coverage
+      uses: codecov/codecov-action@v1
+      with:
+        fail_ci_if_error: false
+        file: ./coverage.xml
 
-  # cron-tutorial-notebooks:
-  #   if: github.repository == 'Project-MONAI/MONAI'
-  #   needs: cron-gpu  # so that monai itself is verified first
-  #   container:
-  #     image: nvcr.io/nvidia/pytorch:22.04-py3  # testing with the latest pytorch base image
-  #     options: "--gpus all --ipc=host"
-  #   runs-on: [self-hosted, linux, x64, common]
-  #   steps:
-  #   - uses: actions/checkout@v2
-  #   - name: Install MONAI
-  #     id: monai-install
-  #     run: |
-  #       which python
-  #       python -m pip install --upgrade pip wheel
-  #       python -m pip install -r requirements-dev.txt
-  #       BUILD_MONAI=1 python setup.py develop  # install monai
-  #       nvidia-smi
-  #       export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-  #       echo $CUDA_VISIBLE_DEVICES
-  #       echo "::set-output name=devices::$CUDA_VISIBLE_DEVICES"
-  #   - name: Checkout tutorials and install their requirements
-  #     run: |
-  #       cd /opt
-  #       git clone --depth 1 --branch master --single-branch https://github.com/Project-MONAI/tutorials.git  # latest commit of master branch
-  #       cd tutorials
-  #       python -m pip install -r requirements.txt
-  #   - name: Run tutorial notebooks
-  #     timeout-minutes: 150
-  #     run: |
-  #       export CUDA_VISIBLE_DEVICES=${{ steps.monai-install.outputs.devices }}
-  #       echo $CUDA_VISIBLE_DEVICES
-  #       trap 'if pgrep python; then pkill python; fi;' ERR
-  #       python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-  #       cd /opt/tutorials
-  #       python -c 'import monai; monai.config.print_debug_info()'
-  #       $(pwd)/runner.sh
-  #       python -c 'import monai; monai.config.print_debug_info()'
-  #       if pgrep python; then pkill python; fi
+  cron-tutorial-notebooks:
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: cron-gpu  # so that monai itself is verified first
+    container:
+      image: nvcr.io/nvidia/pytorch:22.04-py3  # testing with the latest pytorch base image
+      options: "--gpus all --ipc=host"
+    runs-on: [self-hosted, linux, x64, common]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Install MONAI
+      id: monai-install
+      run: |
+        which python
+        python -m pip install --upgrade pip wheel
+        python -m pip install -r requirements-dev.txt
+        BUILD_MONAI=1 python setup.py develop  # install monai
+        nvidia-smi
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        echo "::set-output name=devices::$CUDA_VISIBLE_DEVICES"
+    - name: Checkout tutorials and install their requirements
+      run: |
+        cd /opt
+        git clone --depth 1 --branch master --single-branch https://github.com/Project-MONAI/tutorials.git  # latest commit of master branch
+        cd tutorials
+        python -m pip install -r requirements.txt
+    - name: Run tutorial notebooks
+      timeout-minutes: 150
+      run: |
+        export CUDA_VISIBLE_DEVICES=${{ steps.monai-install.outputs.devices }}
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        cd /opt/tutorials
+        python -c 'import monai; monai.config.print_debug_info()'
+        $(pwd)/runner.sh
+        python -c 'import monai; monai.config.print_debug_info()'
+        if pgrep python; then pkill python; fi
\ No newline at end of file

From e8227bfd6814573498d8bf2f3d30e6d1c45b2b40 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 11 May 2022 03:47:33 +0000
Subject: [PATCH 10/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/workflows/cron.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 4ef8652e97..08065147e5 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -237,4 +237,4 @@ jobs:
         python -c 'import monai; monai.config.print_debug_info()'
         $(pwd)/runner.sh
         python -c 'import monai; monai.config.print_debug_info()'
-        if pgrep python; then pkill python; fi
\ No newline at end of file
+        if pgrep python; then pkill python; fi

From c09a97d16ff5633fa4cdab19ab9250c8e7cb2e54 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Wed, 11 May 2022 14:28:05 +0800
Subject: [PATCH 11/12] fix old pt issue

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 tests/test_dynunet.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index a35ee36c95..ff5d5efbef 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -17,6 +17,7 @@
 
 from monai.networks import eval_mode
 from monai.networks.nets import DynUNet
+from monai.utils.module import pytorch_after
 from tests.utils import skip_if_no_cuda, skip_if_windows, test_script_save
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -141,7 +142,11 @@ def test_consistency(self, input_param, input_shape, _):
                         with eval_mode(net_fuser):
                             result_fuser = net_fuser(input_tensor)
 
-                        torch.testing.assert_close(result, result_fuser)
+                        # torch.testing.assert_allclose() is deprecated since 1.12 and will be removed in 1.14
+                        if pytorch_after(1, 12):
+                            torch.testing.assert_close(result, result_fuser)
+                        else:
+                            torch.testing.assert_allclose(result, result_fuser)
 
 
 class TestDynUNetDeepSupervision(unittest.TestCase):

From 2f559907620a6f854ce1dc0e47dbd61ddb6ec265 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Wed, 11 May 2022 16:08:48 +0800
Subject: [PATCH 12/12] change to return directly if no cuda

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 monai/networks/layers/factories.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/layers/factories.py b/monai/networks/layers/factories.py
index 58a04b122b..89fe1912a5 100644
--- a/monai/networks/layers/factories.py
+++ b/monai/networks/layers/factories.py
@@ -268,7 +268,7 @@ def instance_nvfuser_factory(dim):
     # test InstanceNorm3dNVFuser installation with a basic example
     has_nvfuser_flag = has_nvfuser
     if not torch.cuda.is_available():
-        has_nvfuser_flag = False
+        return nn.InstanceNorm3d
     try:
         layer = InstanceNorm3dNVFuser(num_features=1, affine=True).to("cuda:0")
         inp = torch.randn([1, 1, 1, 1, 1]).to("cuda:0")