From 96d8e5f0400e6ee168c6cda2864be4280a491231 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 2 Feb 2021 22:19:36 +0000 Subject: [PATCH 1/5] test new image Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 119 ++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 42 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index d9ffdb7f5e..90e1e3a57c 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -3,36 +3,71 @@ name: crons on: schedule: - cron: "0 2 * * *" # at 02:00 UTC + push: + branches: + - test-20-12 jobs: - cron-gpu: + # cron-gpu: + # if: github.repository == 'Project-MONAI/MONAI' + # container: + # image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 + # options: "--gpus all" + # runs-on: [self-hosted, linux, x64, common] + # strategy: + # matrix: + # pytorch-version: [1.5.0, 1.5.1, 1.6.0, latest] + # steps: + # - uses: actions/checkout@v2 + # - name: Install the dependencies + # run: | + # which python + # python -m pip install --upgrade pip wheel + # python -m pip uninstall -y torch torchvision + # if [ ${{ matrix.pytorch-version }} == "latest" ]; then + # python -m pip install torch torchvision + # elif [ ${{ matrix.pytorch-version }} == "1.5.0" ]; then + # python -m pip install torch==1.5.0 + # python -m pip install torchvision==0.6.0 + # elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then + # python -m pip install torch==1.5.1 + # python -m pip install torchvision==0.6.1 + # elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then + # python -m pip install torch==1.6.0 + # python -m pip install torchvision==0.7.0 + # fi + # python -m pip install -r requirements-dev.txt + # python -m pip list + # - name: Run tests report coverage + # run: | + # export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ] + # echo "Sleep $LAUNCH_DELAY" + # sleep $LAUNCH_DELAY + # nvidia-smi + # export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + # echo $CUDA_VISIBLE_DEVICES + # python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + # python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' + # BUILD_MONAI=1 ./runtests.sh --coverage + # coverage xml + # - name: Upload coverage + # uses: codecov/codecov-action@v1 + # with: + # fail_ci_if_error: false + # file: ./coverage.xml + + cron-pt-image: if: github.repository == 'Project-MONAI/MONAI' container: - image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 + image: nvcr.io/nvidia/pytorch:20.12-py3 options: "--gpus all" runs-on: [self-hosted, linux, x64, common] - strategy: - matrix: - pytorch-version: [1.5.0, 1.5.1, 1.6.0, latest] steps: - uses: actions/checkout@v2 - name: Install the dependencies run: | which python python -m pip install --upgrade pip wheel - python -m pip uninstall -y torch torchvision - if [ ${{ matrix.pytorch-version }} == "latest" ]; then - python -m pip install torch torchvision - elif [ ${{ matrix.pytorch-version }} == "1.5.0" ]; then - python -m pip install torch==1.5.0 - python -m pip install torchvision==0.6.0 - elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then - python -m pip install torch==1.5.1 - python -m pip install torchvision==0.6.1 - elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then - python -m pip install torch==1.6.0 - python -m pip install torchvision==0.7.0 - fi python -m pip install -r requirements-dev.txt python -m pip list - name: Run tests report coverage @@ -53,27 +88,27 @@ jobs: fail_ci_if_error: false file: ./coverage.xml - cron-docker: - if: github.repository == 'Project-MONAI/MONAI' - container: - image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image - options: "--gpus all" - runs-on: [self-hosted, linux, x64, common] - steps: - - name: Run tests report coverage - # The docker image process has done the compilation. BUILD_MONAI=1 may not be necessary. - run: | - cd /opt/monai - nvidia-smi - export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) - echo $CUDA_VISIBLE_DEVICES - python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" - python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' - ngc --version - BUILD_MONAI=1 ./runtests.sh --coverage --pytype - coverage xml - - name: Upload coverage - uses: codecov/codecov-action@v1 - with: - fail_ci_if_error: false - file: ./coverage.xml + # cron-docker: + # if: github.repository == 'Project-MONAI/MONAI' + # container: + # image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image + # options: "--gpus all" + # runs-on: [self-hosted, linux, x64, common] + # steps: + # - name: Run tests report coverage + # # The docker image process has done the compilation. BUILD_MONAI=1 may not be necessary. + # run: | + # cd /opt/monai + # nvidia-smi + # export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + # echo $CUDA_VISIBLE_DEVICES + # python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + # python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' + # ngc --version + # BUILD_MONAI=1 ./runtests.sh --coverage --pytype + # coverage xml + # - name: Upload coverage + # uses: codecov/codecov-action@v1 + # with: + # fail_ci_if_error: false + # file: ./coverage.xml From 75f20b4af2e0b2ea5d85574d5d4459e2234e5ee2 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 2 Feb 2021 23:29:19 +0000 Subject: [PATCH 2/5] temp no cron Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 90e1e3a57c..82dca178eb 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -1,8 +1,8 @@ name: crons on: - schedule: - - cron: "0 2 * * *" # at 02:00 UTC + # schedule: + # - cron: "0 2 * * *" # at 02:00 UTC push: branches: - test-20-12 From 6a7505eb95363cebb48ebf50dc5a8d03eb883dc4 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 22 Feb 2021 13:01:20 -0500 Subject: [PATCH 3/5] fixes unit tests Signed-off-by: Wenqi Li --- tests/test_ensemble_evaluator.py | 2 +- tests/test_handler_checkpoint_loader.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_ensemble_evaluator.py b/tests/test_ensemble_evaluator.py index fdb9695476..9cc977d876 100644 --- a/tests/test_ensemble_evaluator.py +++ b/tests/test_ensemble_evaluator.py @@ -55,7 +55,7 @@ def forward(self, x): def run_post_transform(engine): for i in range(5): expected_value = engine.state.iteration + i - torch.testing.assert_allclose(engine.state.output[f"pred{i}"], expected_value) + torch.testing.assert_allclose(engine.state.output[f"pred{i}"], torch.tensor([[expected_value]])) val_engine.run() diff --git a/tests/test_handler_checkpoint_loader.py b/tests/test_handler_checkpoint_loader.py index d299b65e9b..8b0f752ff4 100644 --- a/tests/test_handler_checkpoint_loader.py +++ b/tests/test_handler_checkpoint_loader.py @@ -40,7 +40,7 @@ def test_one_save_one_load(self): engine = Engine(lambda e, b: None) CheckpointLoader(load_path=path, load_dict={"net": net2}).attach(engine) engine.run([0] * 8, max_epochs=1) - torch.testing.assert_allclose(net2.state_dict()["weight"], 0.1) + torch.testing.assert_allclose(net2.state_dict()["weight"], torch.tensor([0.1])) def test_two_save_one_load(self): logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -62,7 +62,7 @@ def test_two_save_one_load(self): engine = Engine(lambda e, b: None) CheckpointLoader(load_path=path, load_dict={"net": net2}).attach(engine) engine.run([0] * 8, max_epochs=1) - torch.testing.assert_allclose(net2.state_dict()["weight"], 0.1) + torch.testing.assert_allclose(net2.state_dict()["weight"], torch.tensor([0.1])) def test_save_single_device_load_multi_devices(self): logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -83,7 +83,7 @@ def test_save_single_device_load_multi_devices(self): engine = Engine(lambda e, b: None) CheckpointLoader(load_path=path, load_dict={"net": net2}).attach(engine) engine.run([0] * 8, max_epochs=1) - torch.testing.assert_allclose(net2.state_dict()["module.weight"].cpu(), 0.1) + torch.testing.assert_allclose(net2.state_dict()["module.weight"].cpu(), torch.tensor([0.1])) if __name__ == "__main__": From 1f4a9857ea0a93768bd8b05adeec97493bd5f091 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 22 Feb 2021 18:16:59 +0000 Subject: [PATCH 4/5] adds base docker test Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 145 +++++++++++++++++++------------------ Dockerfile | 2 +- 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 82dca178eb..e00492c223 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -8,58 +8,58 @@ on: - test-20-12 jobs: - # cron-gpu: - # if: github.repository == 'Project-MONAI/MONAI' - # container: - # image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 - # options: "--gpus all" - # runs-on: [self-hosted, linux, x64, common] - # strategy: - # matrix: - # pytorch-version: [1.5.0, 1.5.1, 1.6.0, latest] - # steps: - # - uses: actions/checkout@v2 - # - name: Install the dependencies - # run: | - # which python - # python -m pip install --upgrade pip wheel - # python -m pip uninstall -y torch torchvision - # if [ ${{ matrix.pytorch-version }} == "latest" ]; then - # python -m pip install torch torchvision - # elif [ ${{ matrix.pytorch-version }} == "1.5.0" ]; then - # python -m pip install torch==1.5.0 - # python -m pip install torchvision==0.6.0 - # elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then - # python -m pip install torch==1.5.1 - # python -m pip install torchvision==0.6.1 - # elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then - # python -m pip install torch==1.6.0 - # python -m pip install torchvision==0.7.0 - # fi - # python -m pip install -r requirements-dev.txt - # python -m pip list - # - name: Run tests report coverage - # run: | - # export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ] - # echo "Sleep $LAUNCH_DELAY" - # sleep $LAUNCH_DELAY - # nvidia-smi - # export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) - # echo $CUDA_VISIBLE_DEVICES - # python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" - # python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' - # BUILD_MONAI=1 ./runtests.sh --coverage - # coverage xml - # - name: Upload coverage - # uses: codecov/codecov-action@v1 - # with: - # fail_ci_if_error: false - # file: ./coverage.xml + cron-gpu: + if: github.repository == 'Project-MONAI/MONAI' + container: + image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 + options: "--gpus all" + runs-on: [self-hosted, linux, x64, common] + strategy: + matrix: + pytorch-version: [1.5.0, 1.5.1, 1.6.0, latest] + steps: + - uses: actions/checkout@v2 + - name: Install the dependencies + run: | + which python + python -m pip install --upgrade pip wheel + python -m pip uninstall -y torch torchvision + if [ ${{ matrix.pytorch-version }} == "latest" ]; then + python -m pip install torch torchvision + elif [ ${{ matrix.pytorch-version }} == "1.5.0" ]; then + python -m pip install torch==1.5.0 + python -m pip install torchvision==0.6.0 + elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then + python -m pip install torch==1.5.1 + python -m pip install torchvision==0.6.1 + elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then + python -m pip install torch==1.6.0 + python -m pip install torchvision==0.7.0 + fi + python -m pip install -r requirements-dev.txt + python -m pip list + - name: Run tests report coverage + run: | + export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ] + echo "Sleep $LAUNCH_DELAY" + sleep $LAUNCH_DELAY + nvidia-smi + export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + echo $CUDA_VISIBLE_DEVICES + python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' + BUILD_MONAI=1 ./runtests.sh --coverage + coverage xml + - name: Upload coverage + uses: codecov/codecov-action@v1 + with: + fail_ci_if_error: false + file: ./coverage.xml cron-pt-image: if: github.repository == 'Project-MONAI/MONAI' container: - image: nvcr.io/nvidia/pytorch:20.12-py3 + image: nvcr.io/nvidia/pytorch:20.12-py3 # testing with the latest pytorch base image options: "--gpus all" runs-on: [self-hosted, linux, x64, common] steps: @@ -88,27 +88,28 @@ jobs: fail_ci_if_error: false file: ./coverage.xml - # cron-docker: - # if: github.repository == 'Project-MONAI/MONAI' - # container: - # image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image - # options: "--gpus all" - # runs-on: [self-hosted, linux, x64, common] - # steps: - # - name: Run tests report coverage - # # The docker image process has done the compilation. BUILD_MONAI=1 may not be necessary. - # run: | - # cd /opt/monai - # nvidia-smi - # export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) - # echo $CUDA_VISIBLE_DEVICES - # python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" - # python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' - # ngc --version - # BUILD_MONAI=1 ./runtests.sh --coverage --pytype - # coverage xml - # - name: Upload coverage - # uses: codecov/codecov-action@v1 - # with: - # fail_ci_if_error: false - # file: ./coverage.xml + cron-docker: + if: github.repository == 'Project-MONAI/MONAI' + container: + image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image + options: "--gpus all" + runs-on: [self-hosted, linux, x64, common] + steps: + - name: Run tests report coverage + # The docker image process has done the compilation. + # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag. + run: | + cd /opt/monai + nvidia-smi + export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + echo $CUDA_VISIBLE_DEVICES + python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' + ngc --version + BUILD_MONAI=1 ./runtests.sh --coverage --pytype + coverage xml + - name: Upload coverage + uses: codecov/codecov-action@v1 + with: + fail_ci_if_error: false + file: ./coverage.xml diff --git a/Dockerfile b/Dockerfile index a600f9de84..d0384c7bee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:20.10-py3 +ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:20.12-py3 FROM ${PYTORCH_IMAGE} From f707b80938558cc9930e09e28cec4e8f21d89950 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 22 Feb 2021 18:27:44 +0000 Subject: [PATCH 5/5] undo temp tests Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index e00492c223..e568ba9e15 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -1,11 +1,8 @@ name: crons on: - # schedule: - # - cron: "0 2 * * *" # at 02:00 UTC - push: - branches: - - test-20-12 + schedule: + - cron: "0 2 * * *" # at 02:00 UTC jobs: cron-gpu: