From 29bdcd1502438a09cb1213677408692034324eb7 Mon Sep 17 00:00:00 2001 From: Peixin Li Date: Tue, 26 Apr 2022 09:14:46 +0800 Subject: [PATCH 1/5] Support new temp dgx runner Signed-off-by: Peixin Li --- .github/workflows/cron.yml | 10 +++++----- .github/workflows/integration.yml | 2 +- .github/workflows/pythonapp-gpu.yml | 2 +- .github/workflows/setupapp.yml | 2 +- monai/data/wsi_reader.py | 27 ++++++++++----------------- 5 files changed, 18 insertions(+), 25 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 734a84ff2f..d18d2dfc34 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -11,7 +11,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: nvcr.io/nvidia/pytorch:21.06-py3 # CUDA 11.3 - options: "--gpus all" + options: "--gpus all --shm-size 2g" runs-on: [self-hosted, linux, x64, common] strategy: matrix: @@ -65,7 +65,7 @@ jobs: container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.03"] # 21.02, 21.10 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image - options: "--gpus all" + options: "--gpus all --shm-size 2g" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 @@ -109,7 +109,7 @@ jobs: container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.03"] # 21.02, 21.10 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image - options: "--gpus all" + options: "--gpus all --shm-size 2g" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 @@ -174,7 +174,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: docker://projectmonai/monai:latest # this might be slow and has the pull count limitations - options: "--gpus all" + options: "--gpus all --shm-size 2g" runs-on: [self-hosted, linux, x64, common] steps: - name: Run tests report coverage @@ -205,7 +205,7 @@ jobs: needs: cron-gpu # so that monai itself is verified first container: image: nvcr.io/nvidia/pytorch:22.03-py3 # testing with the latest pytorch base image - options: "--gpus all --ipc=host" + options: "--gpus all --ipc=host --shm-size 2g" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index c38b66eda4..9b0d40440c 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ jobs: integration-py3: container: image: nvcr.io/nvidia/pytorch:21.12-py3 # CUDA 11.5 - options: --gpus all + options: --gpus all --shm-size 2g runs-on: [self-hosted, linux, x64, common] steps: # checkout the pull request branch diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml index 90b31e99ab..6a39b1085f 100644 --- a/.github/workflows/pythonapp-gpu.yml +++ b/.github/workflows/pythonapp-gpu.yml @@ -57,7 +57,7 @@ jobs: base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04" container: image: ${{ matrix.base }} - options: --gpus all + options: --gpus all --shm-size 2g runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/setupapp.yml b/.github/workflows/setupapp.yml index 02911ea51f..e1f5b5d0e4 100644 --- a/.github/workflows/setupapp.yml +++ b/.github/workflows/setupapp.yml @@ -22,7 +22,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: nvcr.io/nvidia/pytorch:21.06-py3 # CUDA 11.3 - options: --gpus all + options: --gpus all --shm-size 2g runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index ad5141787c..02032a0ae6 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -180,7 +180,16 @@ def get_data( f"The image dimension should be 3 but has {patch.ndim}. " "`WSIReader` is designed to work only with 2D images with color channel." ) - + # Check if there are four color channels for RGBA + if mode == "RGBA" and patch.shape[0] != 4: + raise ValueError( + f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." + ) + # Check if there are three color channels for RGB + elif mode in "RGB" and patch.shape[0] != 3: + raise ValueError( + f"The image is expected to have three color channels in '{mode}' mode but has {patch.shape[0]}. " + ) # Create a list of patches patch_list.append(patch) @@ -408,11 +417,6 @@ def get_patch( patch = AsChannelFirst()(patch) # type: ignore # Check if the color channel is 3 (RGB) or 4 (RGBA) - if mode == "RGBA" and patch.shape[0] != 4: - raise ValueError( - f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." - ) - if mode in "RGB": if patch.shape[0] not in [3, 4]: raise ValueError( @@ -537,15 +541,4 @@ def get_patch( # Make it channel first patch = AsChannelFirst()(patch) # type: ignore - # Check if the color channel is 3 (RGB) or 4 (RGBA) - if mode == "RGBA" and patch.shape[0] != 4: - raise ValueError( - f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." - ) - - elif mode in "RGB" and patch.shape[0] != 3: - raise ValueError( - f"The image is expected to have three color channels in '{mode}' mode but has {patch.shape[0]}. " - ) - return patch From 9ad18a521934a8efeacdfe417ba68dcfd03838ad Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 26 Apr 2022 09:34:02 +0100 Subject: [PATCH 2/5] atol 1e-5 Signed-off-by: Wenqi Li --- tests/test_add_extreme_points_channeld.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_add_extreme_points_channeld.py b/tests/test_add_extreme_points_channeld.py index 39d221596f..f9837e9ef4 100644 --- a/tests/test_add_extreme_points_channeld.py +++ b/tests/test_add_extreme_points_channeld.py @@ -68,7 +68,7 @@ def test_correct_results(self, input_data, expected): keys="img", label_key="label", sigma=1.0, rescale_min=0.0, rescale_max=1.0 ) result = add_extreme_points_channel(input_data) - assert_allclose(result["img"][IMG_CHANNEL], expected, rtol=1e-4) + assert_allclose(result["img"][IMG_CHANNEL], expected, rtol=1e-4, atol=1e-5) if __name__ == "__main__": From 071abcf80d30870eb4b3f58d214d50d7c85ff2ae Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 26 Apr 2022 10:29:09 +0100 Subject: [PATCH 3/5] temp tests Signed-off-by: Wenqi Li --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 9b0d40440c..c38b66eda4 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ jobs: integration-py3: container: image: nvcr.io/nvidia/pytorch:21.12-py3 # CUDA 11.5 - options: --gpus all --shm-size 2g + options: --gpus all runs-on: [self-hosted, linux, x64, common] steps: # checkout the pull request branch From 5852cc5689359dc6fff1fe9fa9c57908b281d5ba Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 26 Apr 2022 11:06:04 +0100 Subject: [PATCH 4/5] atol 1e-5 Signed-off-by: Wenqi Li --- tests/test_add_extreme_points_channel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_add_extreme_points_channel.py b/tests/test_add_extreme_points_channel.py index d2c8a627b6..116f96126f 100644 --- a/tests/test_add_extreme_points_channel.py +++ b/tests/test_add_extreme_points_channel.py @@ -71,7 +71,7 @@ class TestAddExtremePointsChannel(unittest.TestCase): def test_correct_results(self, input_data, expected): add_extreme_points_channel = AddExtremePointsChannel() result = add_extreme_points_channel(**input_data) - assert_allclose(result[IMG_CHANNEL], expected, rtol=1e-4) + assert_allclose(result[IMG_CHANNEL], expected, rtol=1e-4, atol=1e-5) if __name__ == "__main__": From dc1af5e63751b2f1f78f5931ded8dfa4c223c90e Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Tue, 26 Apr 2022 12:31:29 +0100 Subject: [PATCH 5/5] update to default shm-size 4g Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 10 +++++----- .github/workflows/integration.yml | 2 +- .github/workflows/pythonapp-gpu.yml | 2 +- .github/workflows/setupapp.yml | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index d18d2dfc34..734a84ff2f 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -11,7 +11,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: nvcr.io/nvidia/pytorch:21.06-py3 # CUDA 11.3 - options: "--gpus all --shm-size 2g" + options: "--gpus all" runs-on: [self-hosted, linux, x64, common] strategy: matrix: @@ -65,7 +65,7 @@ jobs: container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.03"] # 21.02, 21.10 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image - options: "--gpus all --shm-size 2g" + options: "--gpus all" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 @@ -109,7 +109,7 @@ jobs: container: ["pytorch:21.02", "pytorch:21.10", "pytorch:22.03"] # 21.02, 21.10 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image - options: "--gpus all --shm-size 2g" + options: "--gpus all" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 @@ -174,7 +174,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: docker://projectmonai/monai:latest # this might be slow and has the pull count limitations - options: "--gpus all --shm-size 2g" + options: "--gpus all" runs-on: [self-hosted, linux, x64, common] steps: - name: Run tests report coverage @@ -205,7 +205,7 @@ jobs: needs: cron-gpu # so that monai itself is verified first container: image: nvcr.io/nvidia/pytorch:22.03-py3 # testing with the latest pytorch base image - options: "--gpus all --ipc=host --shm-size 2g" + options: "--gpus all --ipc=host" runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index c38b66eda4..767b58a792 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -8,7 +8,7 @@ jobs: integration-py3: container: image: nvcr.io/nvidia/pytorch:21.12-py3 # CUDA 11.5 - options: --gpus all + options: --gpus all # shm-size 4g works fine runs-on: [self-hosted, linux, x64, common] steps: # checkout the pull request branch diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml index 6a39b1085f..90b31e99ab 100644 --- a/.github/workflows/pythonapp-gpu.yml +++ b/.github/workflows/pythonapp-gpu.yml @@ -57,7 +57,7 @@ jobs: base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04" container: image: ${{ matrix.base }} - options: --gpus all --shm-size 2g + options: --gpus all runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/setupapp.yml b/.github/workflows/setupapp.yml index e1f5b5d0e4..02911ea51f 100644 --- a/.github/workflows/setupapp.yml +++ b/.github/workflows/setupapp.yml @@ -22,7 +22,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' container: image: nvcr.io/nvidia/pytorch:21.06-py3 # CUDA 11.3 - options: --gpus all --shm-size 2g + options: --gpus all runs-on: [self-hosted, linux, x64, common] steps: - uses: actions/checkout@v2