From 7f221ff5c113948656628d7205e77cf27c5859a0 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 2 May 2022 18:58:06 +0000 Subject: [PATCH 01/30] Implement SlidingPatchWSIDataset Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 2 +- monai/data/wsi_datasets.py | 99 +++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 9 deletions(-) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index d9af568508..59532e4d8d 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -87,5 +87,5 @@ worker_init_fn, zoom_affine, ) -from .wsi_datasets import PatchWSIDataset +from .wsi_datasets import PatchWSIDataset, SlidingPatchWSIDataset from .wsi_reader import BaseWSIReader, CuCIMWSIReader, OpenSlideWSIReader, WSIReader diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index a895e8aa45..eea6790be8 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -10,7 +10,8 @@ # limitations under the License. import inspect -from typing import Callable, Dict, List, Optional, Tuple, Union +from itertools import product +from typing import Callable, Dict, Optional, Sequence, Tuple, Union import numpy as np @@ -19,7 +20,7 @@ from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep -__all__ = ["PatchWSIDataset"] +__all__ = ["PatchWSIDataset", "SlidingPatchWSIDataset"] class PatchWSIDataset(Dataset): @@ -32,10 +33,12 @@ class PatchWSIDataset(Dataset): size: the size of patch to be extracted from the whole slide image. level: the level at which the patches to be extracted (default to 0). transform: transforms to be executed on input data. - reader: the module to be used for loading whole slide imaging, - - if `reader` is a string, it defines the backend of `monai.data.WSIReader`. Defaults to cuCIM. - - if `reader` is a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader. - - if `reader` is an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. + reader: the module to be used for loading whole slide imaging. If `reader` is + + - a string, it defines the backend of `monai.data.WSIReader`. Defaults to cuCIM. + - a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader. + - an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. + kwargs: additional arguments to pass to `WSIReader` or provided whole slide reader class Note: @@ -45,14 +48,14 @@ class PatchWSIDataset(Dataset): [ {"image": "path/to/image1.tiff", "location": [200, 500], "label": 0}, - {"image": "path/to/image2.tiff", "location": [100, 700], "label": 1} + {"image": "path/to/image2.tiff", "location": [100, 700], "size": [20, 20], "level": 2, "label": 1} ] """ def __init__( self, - data: List, + data: Sequence, size: Optional[Union[int, Tuple[int, int]]] = None, level: Optional[int] = None, transform: Optional[Callable] = None, @@ -133,3 +136,83 @@ def _transform(self, index: int): # Create put all patch information together and apply transforms patch = {"image": image, "label": label, "metadata": metadata} return apply_transform(self.transform, patch) if self.transform else patch + + +class SlidingPatchWSIDataset(PatchWSIDataset): + """ + This dataset extracts patches from whole slide images (without loading the whole image) + It also reads labels for each patch and provides each patch with its associated class labels. + + Args: + data: the list of input samples including image, location, and label (see the note below for more details). + size: the size of patch to be extracted from the whole slide image. + level: the level at which the patches to be extracted (default to 0). + transform: transforms to be executed on input data. + reader: the module to be used for loading whole slide imaging. Defaults to cuCIM. If `reader` is + + - a string, it defines the backend of `monai.data.WSIReader`. + - a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader, + - an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. + + kwargs: additional arguments to pass to `WSIReader` or provided whole slide reader class + + Note: + The input data has the following form as an example: + + .. code-block:: python + + [ + {"image": "path/to/image1.tiff"}, + {"image": "path/to/image2.tiff", "size": [20, 20], "level": 2} + ] + + """ + + def __init__( + self, + data: Sequence, + size: Optional[Union[int, Tuple[int, int]]] = None, + level: Optional[int] = None, + overlap: float = 0, + transform: Optional[Callable] = None, + reader="cuCIM", + **kwargs, + ): + super().__init__(data=data, size=size, level=level, transform=transform, reader=reader, **kwargs) + self.overlap = overlap + + # Create single sample for each patch (in a sliding window manner) + self.data = [] + for sample in data: + prepared_sample = self._make_patches(sample) + self.data.extend(prepared_sample) + + def _make_patches(self, sample): + """Define the location for each patch based on sliding-window approach""" + wsi_obj = self._get_wsi_object(sample) + wsi_size = wsi_obj.get_size(0) + + patch_size = self._get_size(sample) + level = self._get_level(sample) + ratio = 1.0 + if level > 0: + wsi_size_at_level = wsi_obj.get_size(level) + ratio = [wsi_size[i] / wsi_size_at_level[i] for i in range(len(self.size))] + + steps = (int(patch_size[i] * ratio[i] * self.overlap) for i in range(len(self.size))) + locations = product(range(0, wsi_size[i], steps[i]) for i in range(len(self.size))) + + sample["size"] = patch_size + sample["level"] = level + n_patches = len(locations) + return [{**sample, "location": locations[i], "patch_num": i, "n_patches": n_patches} for i in range(n_patches)] + + def _transform(self, index: int): + # Get a single entry of data + sample: Dict = self.data[index] + # Extract patch image and associated metadata + image, metadata = self._get_data(sample) + + # Create put all patch information together and apply transforms + patch = {"image": image, "metadata": metadata} + return apply_transform(self.transform, patch) if self.transform else patch From b011f41296e1ddd57854f4c687c43989500b3057 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 2 May 2022 18:58:21 +0000 Subject: [PATCH 02/30] Update docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/data.rst b/docs/source/data.rst index 02e8031117..b60fb7711f 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -311,3 +311,8 @@ PatchWSIDataset ~~~~~~~~~~~~~~~ .. autoclass:: monai.data.PatchWSIDataset :members: + +SlidingPatchWSIDataset +~~~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: monai.data.SlidingPatchWSIDataset + :members: From 3830551907f1794a2e9ef6b12bf6d621bb83a990 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 6 May 2022 15:20:28 +0000 Subject: [PATCH 03/30] Fix a typo in skip message Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index 0be30536de..d128d45262 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -158,7 +158,7 @@ def setUpClass(cls): cls.backend = "cucim" -@skipUnless(has_osl, "Requires cucim") +@skipUnless(has_osl, "Requires openslide") class TestPatchWSIDatasetOpenSlide(PatchWSIDatasetTests.Tests): @classmethod def setUpClass(cls): From ba20427c777cf9278f386778ad863783b8ee3193 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 6 May 2022 18:47:18 +0000 Subject: [PATCH 04/30] Update patch/steps at different levels Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 26 +++++++++++++++----------- monai/data/wsi_reader.py | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index eea6790be8..aaca76f06b 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -190,29 +190,33 @@ def __init__( def _make_patches(self, sample): """Define the location for each patch based on sliding-window approach""" wsi_obj = self._get_wsi_object(sample) - wsi_size = wsi_obj.get_size(0) + wsi_size = self.wsi_reader.get_size(wsi_obj, 0) - patch_size = self._get_size(sample) + patch_size_level = self._get_size(sample) level = self._get_level(sample) - ratio = 1.0 + ratio = [1.0] * len(patch_size_level) if level > 0: - wsi_size_at_level = wsi_obj.get_size(level) - ratio = [wsi_size[i] / wsi_size_at_level[i] for i in range(len(self.size))] - - steps = (int(patch_size[i] * ratio[i] * self.overlap) for i in range(len(self.size))) - locations = product(range(0, wsi_size[i], steps[i]) for i in range(len(self.size))) - - sample["size"] = patch_size + wsi_size_at_level = self.wsi_reader.get_size(wsi_obj, level) + ratio = [wsi_size[i] / wsi_size_at_level[i] for i in range(len(patch_size_level))] + + patch_size = [int(patch_size_level[i] * ratio[i]) for i in range(len(patch_size_level))] + steps = [int(patch_size[i] * (1.0 - self.overlap)) for i in range(len(patch_size_level))] + locations = list( + product(*[list(range(0, wsi_size[i] - patch_size[i] + 1, steps[i])) for i in range(len(patch_size_level))]) + ) + sample["size"] = patch_size_level sample["level"] = level n_patches = len(locations) return [{**sample, "location": locations[i], "patch_num": i, "n_patches": n_patches} for i in range(n_patches)] + def _get_location(self, sample: Dict): + return sample["location"] + def _transform(self, index: int): # Get a single entry of data sample: Dict = self.data[index] # Extract patch image and associated metadata image, metadata = self._get_data(sample) - # Create put all patch information together and apply transforms patch = {"image": image, "metadata": metadata} return apply_transform(self.transform, patch) if self.transform else patch diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 02032a0ae6..2b1863e321 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -158,7 +158,7 @@ def get_data( # Verify location if location is None: location = (0, 0) - wsi_size = self.get_size(each_wsi, level) + wsi_size = self.get_size(each_wsi, 0) if location[0] > wsi_size[0] or location[1] > wsi_size[1]: raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") From 1db779c5810b05be6ad7f8dc204f47e41e1e6087 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 6 May 2022 19:40:48 +0000 Subject: [PATCH 05/30] Update to sliding_sample Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index aaca76f06b..63320d11e8 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -184,8 +184,8 @@ def __init__( # Create single sample for each patch (in a sliding window manner) self.data = [] for sample in data: - prepared_sample = self._make_patches(sample) - self.data.extend(prepared_sample) + sliding_samples = self._make_patches(sample) + self.data.extend(sliding_samples) def _make_patches(self, sample): """Define the location for each patch based on sliding-window approach""" From 0fc37c36319cbd1f042673af59a80bd047b76daf Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 6 May 2022 19:41:16 +0000 Subject: [PATCH 06/30] Implement unittests for SlidingPatchWSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_sliding_patch_wsi_dataset.py | 224 ++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 tests/test_sliding_patch_wsi_dataset.py diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py new file mode 100644 index 0000000000..4c03598735 --- /dev/null +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -0,0 +1,224 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +from unittest import skipUnless + +import numpy as np +from numpy.testing import assert_array_equal +from parameterized import parameterized + +from monai.data import SlidingPatchWSIDataset +from monai.utils import optional_import +from tests.utils import download_url_or_skip_test, testing_data_config + +cucim, has_cucim = optional_import("cucim") +has_cucim = has_cucim and hasattr(cucim, "CuImage") +imwrite, has_tiff = optional_import("tifffile", name="imwrite") +_, has_codec = optional_import("imagecodecs") +has_tiff = has_tiff and has_codec + + +FILE_KEY = "wsi_img" +FILE_URL = testing_data_config("images", FILE_KEY, "url") +base_name, extension = os.path.basename(f"{FILE_URL}"), ".tiff" +FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) + +FILE_PATH_SMALL_0 = os.path.join(os.path.dirname(__file__), "testing_data", "temp_wsi_inference_0.tiff") +FILE_PATH_SMALL_1 = os.path.join(os.path.dirname(__file__), "testing_data", "temp_wsi_inference_1.tiff") +ARRAY_SMALL_0 = np.random.randint(low=0, high=255, size=(3, 4, 4), dtype=np.uint8) +ARRAY_SMALL_1 = np.random.randint(low=0, high=255, size=(3, 5, 5), dtype=np.uint8) + +TEST_CASE_SMALL_0 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}], "size": (2, 2)}, + [ + {"image": ARRAY_SMALL_0[:, :2, :2]}, + {"image": ARRAY_SMALL_0[:, :2, 2:]}, + {"image": ARRAY_SMALL_0[:, 2:, :2]}, + {"image": ARRAY_SMALL_0[:, 2:, 2:]}, + ], +] + +TEST_CASE_SMALL_1 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}]}, + [ + {"image": ARRAY_SMALL_0[:, :2, :2]}, + {"image": ARRAY_SMALL_0[:, :2, 2:]}, + {"image": ARRAY_SMALL_0[:, 2:, :2]}, + {"image": ARRAY_SMALL_0[:, 2:, 2:]}, + ], +] + +TEST_CASE_SMALL_2 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}], "size": (2, 2), "overlap": 0.5}, + [ + {"image": ARRAY_SMALL_0[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_0[:, 0:2, 1:3]}, + {"image": ARRAY_SMALL_0[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_0[:, 1:3, 0:2]}, + {"image": ARRAY_SMALL_0[:, 1:3, 1:3]}, + {"image": ARRAY_SMALL_0[:, 1:3, 2:4]}, + {"image": ARRAY_SMALL_0[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_0[:, 2:4, 1:3]}, + {"image": ARRAY_SMALL_0[:, 2:4, 2:4]}, + ], +] + +TEST_CASE_SMALL_3 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}], "size": (3, 3), "overlap": 0.50}, + [ + {"image": ARRAY_SMALL_0[:, :3, :3]}, + {"image": ARRAY_SMALL_0[:, :3, 1:]}, + {"image": ARRAY_SMALL_0[:, 1:, :3]}, + {"image": ARRAY_SMALL_0[:, 1:, 1:]}, + ], +] + +TEST_CASE_SMALL_4 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}, {"image": FILE_PATH_SMALL_1, "level": 0}], "size": (2, 2)}, + [ + {"image": ARRAY_SMALL_0[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_0[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_0[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_0[:, 2:4, 2:4]}, + {"image": ARRAY_SMALL_1[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_1[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_1[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_1[:, 2:4, 2:4]}, + ], +] + +TEST_CASE_SMALL_5 = [ + { + "data": [ + {"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}, + {"image": FILE_PATH_SMALL_1, "level": 0, "size": (3, 3)}, + ] + }, + [ + {"image": ARRAY_SMALL_0[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_0[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_0[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_0[:, 2:4, 2:4]}, + {"image": ARRAY_SMALL_1[:, 0:3, 0:3]}, + ], +] + +TEST_CASE_SMALL_6 = [ + { + "data": [ + {"image": FILE_PATH_SMALL_0, "level": 1, "size": (1, 1)}, + {"image": FILE_PATH_SMALL_1, "level": 2, "size": (4, 4)}, + ], + "size": (2, 2), + "level": 0, + }, + [ + {"image": ARRAY_SMALL_0[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_0[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_0[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_0[:, 2:4, 2:4]}, + {"image": ARRAY_SMALL_1[:, 0:2, 0:2]}, + {"image": ARRAY_SMALL_1[:, 0:2, 2:4]}, + {"image": ARRAY_SMALL_1[:, 2:4, 0:2]}, + {"image": ARRAY_SMALL_1[:, 2:4, 2:4]}, + ], +] + +TEST_CASE_LARGE_0 = [ + {"data": [{"image": FILE_PATH, "level": 8, "size": (64, 50)}]}, + [ + {"location": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (0, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (0, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + ], +] + +TEST_CASE_LARGE_1 = [ + { + "data": [ + {"image": FILE_PATH, "level": 8, "size": (64, 50)}, + {"image": FILE_PATH_SMALL_1, "level": 0, "size": (2, 2)}, + ] + }, + [ + {"location": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (0, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (0, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (64, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"location": (0, 0), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, + {"location": (0, 2), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, + {"location": (2, 0), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, + {"location": (2, 2), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, + ], +] + + +@skipUnless(has_cucim or has_tiff, "Requires cucim, openslide, or tifffile!") +def setUpModule(): # noqa: N802 + for info in [(ARRAY_SMALL_0, FILE_PATH_SMALL_0), (ARRAY_SMALL_1, FILE_PATH_SMALL_1)]: + array = info[0].transpose([1, 2, 0]) + imwrite(info[1], array, shape=array.shape, photometric="rgb") + hash_type = testing_data_config("images", FILE_KEY, "hash_type") + hash_val = testing_data_config("images", FILE_KEY, "hash_val") + download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) + + +class SlidingPatchWSIDatasetTests: + class Tests(unittest.TestCase): + backend = None + + @parameterized.expand( + [ + TEST_CASE_SMALL_0, + TEST_CASE_SMALL_1, + TEST_CASE_SMALL_2, + TEST_CASE_SMALL_3, + TEST_CASE_SMALL_4, + TEST_CASE_SMALL_5, + TEST_CASE_SMALL_6, + ] + ) + def test_read_patches(self, input_parameters, expected): + dataset = SlidingPatchWSIDataset(reader=self.backend, **input_parameters) + self.assertEqual(len(dataset), len(expected)) + for i, sample in enumerate(dataset): + self.assertTupleEqual(sample["image"].shape, expected[i]["image"].shape) + + @parameterized.expand([TEST_CASE_LARGE_0, TEST_CASE_LARGE_1]) + def test_read_patches_large(self, input_parameters, expected): + dataset = SlidingPatchWSIDataset(reader=self.backend, **input_parameters) + self.assertEqual(len(dataset), len(expected)) + for i, sample in enumerate(dataset): + self.assertEqual(sample["metadata"]["level"], expected[i]["level"]) + self.assertTupleEqual(sample["metadata"]["size"], expected[i]["size"]) + expected_locations = tuple( + int(expected[i]["location"][j] * expected[i]["ratios"][j]) + for j in range(len(expected[i]["location"])) + ) + self.assertTupleEqual(sample["metadata"]["location"], expected_locations) + + +@skipUnless(has_cucim, "Requires cucim") +class TestSlidingPatchWSIDatasetCuCIM(SlidingPatchWSIDatasetTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "cucim" + + +if __name__ == "__main__": + unittest.main() From 577b09753596fef7180ca7c61d3641ee496cd1c2 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 6 May 2022 19:56:37 +0000 Subject: [PATCH 07/30] Remove unused import Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_sliding_patch_wsi_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index 0c8cedd365..db2d032e4c 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -14,7 +14,6 @@ from unittest import skipUnless import numpy as np -from numpy.testing import assert_array_equal from parameterized import parameterized from monai.data import SlidingPatchWSIDataset From 9cd07bb13006332aa7b69f5f38ad0082a60f1f6b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 9 May 2022 16:51:45 +0000 Subject: [PATCH 08/30] Minor updates and add openslide tests Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 15 ++++--- tests/test_sliding_patch_wsi_dataset.py | 58 +++++++++++++++---------- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 63320d11e8..9fa2d783e5 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -184,10 +184,10 @@ def __init__( # Create single sample for each patch (in a sliding window manner) self.data = [] for sample in data: - sliding_samples = self._make_patches(sample) + sliding_samples = self._evaluate_patch_coordinates(sample) self.data.extend(sliding_samples) - def _make_patches(self, sample): + def _evaluate_patch_coordinates(self, sample): """Define the location for each patch based on sliding-window approach""" wsi_obj = self._get_wsi_object(sample) wsi_size = self.wsi_reader.get_size(wsi_obj, 0) @@ -199,10 +199,15 @@ def _make_patches(self, sample): wsi_size_at_level = self.wsi_reader.get_size(wsi_obj, level) ratio = [wsi_size[i] / wsi_size_at_level[i] for i in range(len(patch_size_level))] - patch_size = [int(patch_size_level[i] * ratio[i]) for i in range(len(patch_size_level))] - steps = [int(patch_size[i] * (1.0 - self.overlap)) for i in range(len(patch_size_level))] + patch_size = [(patch_size_level[i] * ratio[i]) for i in range(len(patch_size_level))] + steps = [round(patch_size[i] * (1.0 - self.overlap)) for i in range(len(patch_size_level))] locations = list( - product(*[list(range(0, wsi_size[i] - patch_size[i] + 1, steps[i])) for i in range(len(patch_size_level))]) + product( + *[ + list(range(0, wsi_size[i] - round(patch_size[i]) + 1, steps[i])) + for i in range(len(patch_size_level)) + ] + ) ) sample["size"] = patch_size_level sample["level"] = level diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index db2d032e4c..fc6e45f845 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -22,6 +22,7 @@ cucim, has_cucim = optional_import("cucim") has_cucim = has_cucim and hasattr(cucim, "CuImage") +openslide, has_osl = optional_import("openslide") imwrite, has_tiff = optional_import("tifffile", name="imwrite") _, has_codec = optional_import("imagecodecs") has_tiff = has_tiff and has_codec @@ -73,7 +74,7 @@ ] TEST_CASE_SMALL_3 = [ - {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}], "size": (3, 3), "overlap": 0.50}, + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0}], "size": (3, 3), "overlap": 2.0 / 3.0}, [ {"image": ARRAY_SMALL_0[:, :3, :3]}, {"image": ARRAY_SMALL_0[:, :3, 1:]}, @@ -136,12 +137,12 @@ TEST_CASE_LARGE_0 = [ {"data": [{"image": FILE_PATH, "level": 8, "size": (64, 50)}]}, [ - {"location": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (0, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (0, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, ], ] @@ -149,20 +150,22 @@ { "data": [ {"image": FILE_PATH, "level": 8, "size": (64, 50)}, - {"image": FILE_PATH_SMALL_1, "level": 0, "size": (2, 2)}, + {"image": FILE_PATH, "level": 7, "size": (125, 110)}, ] }, [ - {"location": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (0, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (0, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 50), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (64, 100), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"location": (0, 0), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, - {"location": (0, 2), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, - {"location": (2, 0), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, - {"location": (2, 2), "size": (2, 2), "level": 0, "ratios": (1.0, 1.0)}, + {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 0), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (0, 1), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (0, 2), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (1, 0), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (1, 1), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (1, 2), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, ], ] @@ -193,6 +196,8 @@ class Tests(unittest.TestCase): ] ) def test_read_patches(self, input_parameters, expected): + if self.backend == "openslide": + return dataset = SlidingPatchWSIDataset(reader=self.backend, **input_parameters) self.assertEqual(len(dataset), len(expected)) for i, sample in enumerate(dataset): @@ -205,11 +210,13 @@ def test_read_patches_large(self, input_parameters, expected): for i, sample in enumerate(dataset): self.assertEqual(sample["metadata"]["patch"]["level"], expected[i]["level"]) self.assertTupleEqual(sample["metadata"]["patch"]["size"], expected[i]["size"]) - expected_locations = tuple( - int(expected[i]["location"][j] * expected[i]["ratios"][j]) - for j in range(len(expected[i]["location"])) + steps = [ + round(expected[i]["ratios"][j] * expected[i]["size"][j]) for j in range(len(expected[i]["size"])) + ] + expected_location = tuple( + expected[i]["step_loc"][j] * steps[j] for j in range(len(expected[i]["size"])) ) - self.assertTupleEqual(sample["metadata"]["patch"]["location"], expected_locations) + self.assertTupleEqual(sample["metadata"]["patch"]["location"], expected_location) @skipUnless(has_cucim, "Requires cucim") @@ -219,5 +226,12 @@ def setUpClass(cls): cls.backend = "cucim" +@skipUnless(has_osl, "Requires openslide") +class TestSlidingPatchWSIDatasetOpenSlide(SlidingPatchWSIDatasetTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "openslide" + + if __name__ == "__main__": unittest.main() From f206ecd5bdd8c14838c1cff561222211d4407dd0 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 16 May 2022 16:48:11 +0000 Subject: [PATCH 09/30] Update docstring Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 07ff0537b2..fa52cc61b6 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -149,6 +149,7 @@ class SlidingPatchWSIDataset(PatchWSIDataset): data: the list of input samples including image, location, and label (see the note below for more details). size: the size of patch to be extracted from the whole slide image. level: the level at which the patches to be extracted (default to 0). + overlap: the relative amount of overlap (between 0 and 1) for patches in each direction. Defaults to 0. transform: transforms to be executed on input data. reader: the module to be used for loading whole slide imaging. Defaults to cuCIM. If `reader` is From cbd477a8d9657dba6e7805e40c72787c788533ba Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 01:03:55 +0000 Subject: [PATCH 10/30] Implement iter_wsi_patch_location Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 42 +++++++++++++++++++ monai/data/wsi_datasets.py | 25 ++++-------- monai/data/wsi_reader.py | 47 +++++++++++++++++++++ tests/test_sliding_patch_wsi_dataset.py | 54 ++++++++++++------------- 4 files changed, 121 insertions(+), 47 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index 2bd7b49731..b31609e81a 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -149,6 +149,48 @@ def iter_patch_slices( yield tuple(slice(s, s + p) for s, p in zip(position[::-1], patch_size_)) +def iter_wsi_patch_location( + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + downsample: float = 1.0, + overlap: float = 0.0, + start_pos: Sequence[int] = (), + padded: bool = False, +): + """ + Yield successive tuple of location defining a patch of size `patch_size` from an image of size `image_size`, and if + it is downsampled by `downsample` ratio with the relative overalpping amount of `overlap`. + The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't + provided. + + Args: + image_size: dimensions of image + downsample: the downsample ratio the + patch_size: size of patches to generate slices for, 0 or None selects whole dimension + downsample: the relative amount of overlap for patches + start_pos: starting position in the image, default is 0 for each dimension + padded: if the image is padded so the patches can go beyond the borders. Defaults to False. + Note that the padding depends on the functionality of the underlying whole slide imaging reader, + and is not guranteed for all images. + + Yields: + Tuple of patch location + """ + ndim = len(image_size) + patch_size = get_valid_patch_size(image_size, patch_size) + start_pos = ensure_tuple_size(start_pos, ndim) + + # Get the patch size at level=0 + patch_size_0 = [p * downsample for p in patch_size] + # Calculate steps, which depends on the amount of overlap + steps = [round(p * (1.0 - overlap)) for p in patch_size_0] + # Calculate the last permitted location (depending on the padding) + end_pos = image_size if padded else [image_size[i] - round(patch_size_0[i]) + 1 for i in range(ndim)] + # Evaluate the starting locations for patches + ranges = tuple(starmap(range, zip(start_pos, end_pos, steps))) + return product(*ranges) + + def dense_patch_slices( image_size: Sequence[int], patch_size: Sequence[int], scan_interval: Sequence[int] ) -> List[Tuple[slice, ...]]: diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index fa52cc61b6..e314c0180c 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -17,6 +17,7 @@ from monai.data import Dataset from monai.data.wsi_reader import BaseWSIReader, WSIReader +from monai.data.utils import iter_wsi_patch_location from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep @@ -193,26 +194,14 @@ def __init__( def _evaluate_patch_coordinates(self, sample): """Define the location for each patch based on sliding-window approach""" wsi_obj = self._get_wsi_object(sample) + patch_size = self._get_size(sample) + level = self._get_level(sample) + wsi_size = self.wsi_reader.get_size(wsi_obj, 0) + downsample = self.wsi_reader.get_downsample(wsi_obj, level) - patch_size_level = self._get_size(sample) - level = self._get_level(sample) - ratio = [1.0] * len(patch_size_level) - if level > 0: - wsi_size_at_level = self.wsi_reader.get_size(wsi_obj, level) - ratio = [wsi_size[i] / wsi_size_at_level[i] for i in range(len(patch_size_level))] - - patch_size = [(patch_size_level[i] * ratio[i]) for i in range(len(patch_size_level))] - steps = [round(patch_size[i] * (1.0 - self.overlap)) for i in range(len(patch_size_level))] - locations = list( - product( - *[ - list(range(0, wsi_size[i] - round(patch_size[i]) + 1, steps[i])) - for i in range(len(patch_size_level)) - ] - ) - ) - sample["size"] = patch_size_level + locations = list(iter_wsi_patch_location(wsi_size, patch_size, downsample, self.overlap)) + sample["size"] = patch_size sample["level"] = level n_patches = len(locations) return [{**sample, "location": locations[i], "patch_num": i, "n_patches": n_patches} for i in range(n_patches)] diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index b29ac3848f..4c2bfc5000 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -85,6 +85,18 @@ def get_level_count(self, wsi) -> int: """ raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + @abstractmethod + def get_downsample(self, wsi, level: int) -> float: + """ + Returns the downsample ratio of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + @abstractmethod def get_file_path(self, wsi) -> str: """Return the file path for the WSI object""" @@ -290,6 +302,17 @@ def get_size(self, wsi, level: int) -> Tuple[int, int]: """ return self.reader.get_size(wsi, level) + def get_downsample(self, wsi, level: int) -> float: + """ + Returns the downsample ratio of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return self.reader.get_downsample(wsi, level) + def get_file_path(self, wsi) -> str: """Return the file path for the WSI object""" return self.reader.get_file_path(wsi) @@ -369,6 +392,18 @@ def get_size(wsi, level: int) -> Tuple[int, int]: """ return (wsi.resolutions["level_dimensions"][level][1], wsi.resolutions["level_dimensions"][level][0]) + @staticmethod + def get_downsample(wsi, level: int) -> float: + """ + Returns the downsample ratio of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return wsi.resolutions["level_downsamples"][level] # type: ignore + def get_file_path(self, wsi) -> str: """Return the file path for the WSI object""" return str(abspath(wsi.path)) @@ -475,6 +510,18 @@ def get_size(wsi, level: int) -> Tuple[int, int]: """ return (wsi.level_dimensions[level][1], wsi.level_dimensions[level][0]) + @staticmethod + def get_downsample(wsi, level: int) -> float: + """ + Returns the downsample ratio of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return wsi.level_downsamples[level] # type: ignore + def get_file_path(self, wsi) -> str: """Return the file path for the WSI object""" return str(abspath(wsi._filename)) diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index fc6e45f845..7094acde8f 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -137,12 +137,12 @@ TEST_CASE_LARGE_0 = [ {"data": [{"image": FILE_PATH, "level": 8, "size": (64, 50)}]}, [ - {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, + {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, ], ] @@ -154,18 +154,18 @@ ] }, [ - {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratios": (32914 / 128, 46000 / 179)}, - {"step_loc": (0, 0), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, - {"step_loc": (0, 1), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, - {"step_loc": (0, 2), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, - {"step_loc": (1, 0), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, - {"step_loc": (1, 1), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, - {"step_loc": (1, 2), "size": (125, 110), "level": 7, "ratios": (32914 / 257, 46000 / 359)}, + {"step_loc": (0, 0), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (0, 1), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (0, 2), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 0), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 1), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (1, 2), "size": (64, 50), "level": 8, "ratio": 257.06195068359375}, + {"step_loc": (0, 0), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, + {"step_loc": (0, 1), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, + {"step_loc": (0, 2), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, + {"step_loc": (1, 0), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, + {"step_loc": (1, 1), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, + {"step_loc": (1, 2), "size": (125, 110), "level": 7, "ratio": 128.10186767578125}, ], ] @@ -210,12 +210,8 @@ def test_read_patches_large(self, input_parameters, expected): for i, sample in enumerate(dataset): self.assertEqual(sample["metadata"]["patch"]["level"], expected[i]["level"]) self.assertTupleEqual(sample["metadata"]["patch"]["size"], expected[i]["size"]) - steps = [ - round(expected[i]["ratios"][j] * expected[i]["size"][j]) for j in range(len(expected[i]["size"])) - ] - expected_location = tuple( - expected[i]["step_loc"][j] * steps[j] for j in range(len(expected[i]["size"])) - ) + steps = [round(expected[i]["ratio"] * s) for s in expected[i]["size"]] + expected_location = tuple(expected[i]["step_loc"][j] * steps[j] for j in range(len(steps))) self.assertTupleEqual(sample["metadata"]["patch"]["location"], expected_location) @@ -226,11 +222,11 @@ def setUpClass(cls): cls.backend = "cucim" -@skipUnless(has_osl, "Requires openslide") -class TestSlidingPatchWSIDatasetOpenSlide(SlidingPatchWSIDatasetTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "openslide" +# @skipUnless(has_osl, "Requires openslide") +# class TestSlidingPatchWSIDatasetOpenSlide(SlidingPatchWSIDatasetTests.Tests): +# @classmethod +# def setUpClass(cls): +# cls.backend = "openslide" if __name__ == "__main__": From aba82b38375d37aad97a5c6b660318f6e8b8ce7f Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 01:08:15 +0000 Subject: [PATCH 11/30] Update docstring Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index b31609e81a..e6c2d89cea 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -158,8 +158,8 @@ def iter_wsi_patch_location( padded: bool = False, ): """ - Yield successive tuple of location defining a patch of size `patch_size` from an image of size `image_size`, and if - it is downsampled by `downsample` ratio with the relative overalpping amount of `overlap`. + Yield successive tuple of locations defining a patch of size `patch_size` from an image of size `image_size`, + with the relative overalpping of `overlap`. The patch is in the resolution level related to `downsample` ratio. The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't provided. From 5171287249a83339d7ef8abaccfe965b34a31acf Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 13:21:08 +0000 Subject: [PATCH 12/30] Uncomment openslide Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_sliding_patch_wsi_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index 7094acde8f..0ab5bfca46 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -222,11 +222,11 @@ def setUpClass(cls): cls.backend = "cucim" -# @skipUnless(has_osl, "Requires openslide") -# class TestSlidingPatchWSIDatasetOpenSlide(SlidingPatchWSIDatasetTests.Tests): -# @classmethod -# def setUpClass(cls): -# cls.backend = "openslide" +@skipUnless(has_osl, "Requires openslide") +class TestSlidingPatchWSIDatasetOpenSlide(SlidingPatchWSIDatasetTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "openslide" if __name__ == "__main__": From 71e70c32b6b235d498fff6687c969a999ad94137 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 13:41:12 +0000 Subject: [PATCH 13/30] Reorder imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index e314c0180c..8997fd35a3 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -16,8 +16,8 @@ import numpy as np from monai.data import Dataset -from monai.data.wsi_reader import BaseWSIReader, WSIReader from monai.data.utils import iter_wsi_patch_location +from monai.data.wsi_reader import BaseWSIReader, WSIReader from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep From 2e4054517c08a8940f46075c633490ef217362c0 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 14:10:50 +0000 Subject: [PATCH 14/30] Add overlap and more updates Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 106 +++++++++++++++++++++---------------- monai/data/wsi_datasets.py | 10 +++- 2 files changed, 68 insertions(+), 48 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index e6c2d89cea..680b39ad51 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -120,7 +120,10 @@ def get_random_patch( def iter_patch_slices( - dims: Sequence[int], patch_size: Union[Sequence[int], int], start_pos: Sequence[int] = () + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + start_pos: Sequence[int] = (), + overlap: float = 0.0, ) -> Generator[Tuple[slice, ...], None, None]: """ Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The @@ -128,69 +131,34 @@ def iter_patch_slices( patch is chosen in a contiguous grid using a first dimension as least significant ordering. Args: - dims: dimensions of array to iterate over + image_size: dimensions of array to iterate over patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the array, default is 0 for each dimension + overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. Yields: Tuples of slice objects defining each patch """ # ensure patchSize and startPos are the right length - ndim = len(dims) - patch_size_ = get_valid_patch_size(dims, patch_size) + ndim = len(image_size) + patch_size_ = get_valid_patch_size(image_size, patch_size) start_pos = ensure_tuple_size(start_pos, ndim) + # calculate steps, which depends on the amount of overlap + steps = tuple(round(p * (1.0 - overlap)) for p in patch_size_) + + # calculate the last starting positions + end_pos = tuple(s - p + 1 for s, p in zip(image_size, patch_size_)) + # collect the ranges to step over each dimension - ranges = tuple(starmap(range, zip(start_pos, dims, patch_size_))) + ranges = tuple(starmap(range, zip(start_pos, end_pos, steps))) # choose patches by applying product to the ranges for position in product(*ranges[::-1]): # reverse ranges order to iterate in index order yield tuple(slice(s, s + p) for s, p in zip(position[::-1], patch_size_)) -def iter_wsi_patch_location( - image_size: Sequence[int], - patch_size: Union[Sequence[int], int], - downsample: float = 1.0, - overlap: float = 0.0, - start_pos: Sequence[int] = (), - padded: bool = False, -): - """ - Yield successive tuple of locations defining a patch of size `patch_size` from an image of size `image_size`, - with the relative overalpping of `overlap`. The patch is in the resolution level related to `downsample` ratio. - The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't - provided. - - Args: - image_size: dimensions of image - downsample: the downsample ratio the - patch_size: size of patches to generate slices for, 0 or None selects whole dimension - downsample: the relative amount of overlap for patches - start_pos: starting position in the image, default is 0 for each dimension - padded: if the image is padded so the patches can go beyond the borders. Defaults to False. - Note that the padding depends on the functionality of the underlying whole slide imaging reader, - and is not guranteed for all images. - - Yields: - Tuple of patch location - """ - ndim = len(image_size) - patch_size = get_valid_patch_size(image_size, patch_size) - start_pos = ensure_tuple_size(start_pos, ndim) - - # Get the patch size at level=0 - patch_size_0 = [p * downsample for p in patch_size] - # Calculate steps, which depends on the amount of overlap - steps = [round(p * (1.0 - overlap)) for p in patch_size_0] - # Calculate the last permitted location (depending on the padding) - end_pos = image_size if padded else [image_size[i] - round(patch_size_0[i]) + 1 for i in range(ndim)] - # Evaluate the starting locations for patches - ranges = tuple(starmap(range, zip(start_pos, end_pos, steps))) - return product(*ranges) - - def dense_patch_slices( image_size: Sequence[int], patch_size: Sequence[int], scan_interval: Sequence[int] ) -> List[Tuple[slice, ...]]: @@ -293,6 +261,50 @@ def iter_patch( arr[...] = arrpad[slices] +def iter_wsi_patch_location( + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + start_pos: Sequence[int] = (), + overlap: float = 0.0, + downsample: float = 1.0, + padded: bool = False, +): + """ + Yield successive tuple of locations defining a patch of size `patch_size` from an image of size `image_size`, + with the relative overalpping of `overlap`. The patch is in the resolution level related to `downsample` ratio. + The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't + provided. + + Args: + image_size: dimensions of image + patch_size: size of patches to generate slices for, 0 or None selects whole dimension + start_pos: starting position in the image, default is 0 for each dimension + overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. + downsample: the downsample ratio + padded: if the image is padded so the patches can go beyond the borders. Defaults to False. + Note that the padding depends on the functionality of the underlying whole slide imaging reader, + and is not guranteed for all images. + + Yields: + Tuple of patch location + """ + ndim = len(image_size) + patch_size = get_valid_patch_size(image_size, patch_size) + start_pos = ensure_tuple_size(start_pos, ndim) + + # get the patch size at level=0 + patch_size_ = tuple(p * downsample for p in patch_size) + + # calculate steps, which depends on the amount of overlap + steps = tuple(round(p * (1.0 - overlap)) for p in patch_size_) + + # calculate the last starting location (depending on the padding) + end_pos = image_size if padded else tuple(s - round(p) + 1 for s, p in zip(image_size, patch_size_)) + + # evaluate the starting locations for patches + return product(*tuple(starmap(range, zip(start_pos, end_pos, steps)))) + + def get_valid_patch_size(image_size: Sequence[int], patch_size: Union[Sequence[int], int]) -> Tuple[int, ...]: """ Given an image of dimensions `image_size`, return a patch size tuple taking the dimension from `patch_size` if this is diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 8997fd35a3..81c6d864b9 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -200,7 +200,15 @@ def _evaluate_patch_coordinates(self, sample): wsi_size = self.wsi_reader.get_size(wsi_obj, 0) downsample = self.wsi_reader.get_downsample(wsi_obj, level) - locations = list(iter_wsi_patch_location(wsi_size, patch_size, downsample, self.overlap)) + locations = list( + iter_wsi_patch_location( + image_size=wsi_size, + patch_size=patch_size, + start_pos=(0, 0), + downsample=downsample, + overlap=self.overlap, + ) + ) sample["size"] = patch_size sample["level"] = level n_patches = len(locations) From b733ec3f5ddc35456186e0a0c861e27eefbe160b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 14:11:54 +0000 Subject: [PATCH 15/30] Remove product Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 81c6d864b9..c6cf0d5704 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -10,7 +10,6 @@ # limitations under the License. import inspect -from itertools import product from typing import Callable, Dict, Optional, Sequence, Tuple, Union import numpy as np From 1aefdd117b7124199891893270723f89c3cd1efb Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 14:20:55 +0000 Subject: [PATCH 16/30] Add overlap to iter_patch Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index 680b39ad51..54cf16ec47 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -203,6 +203,7 @@ def iter_patch( arr: np.ndarray, patch_size: Union[Sequence[int], int] = 0, start_pos: Sequence[int] = (), + overlap: float = 0.0, copy_back: bool = True, mode: Union[NumpyPadMode, str] = NumpyPadMode.WRAP, **pad_opts: Dict, @@ -250,7 +251,7 @@ def iter_patch( # patches which are only in the padded regions iter_size = tuple(s + p for s, p in zip(arr.shape, patch_size_)) - for slices in iter_patch_slices(iter_size, patch_size_, start_pos_padded): + for slices in iter_patch_slices(iter_size, patch_size_, start_pos_padded, overlap): # compensate original image padding coords_no_pad = tuple((coord.start - p, coord.stop - p) for coord, p in zip(slices, patch_size_)) yield arrpad[slices], np.asarray(coords_no_pad) # data and coords (in numpy; works with torch loader) From c57bec4c4f30da7bb2a175405345fa1d253c2360 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 17 May 2022 14:21:21 +0000 Subject: [PATCH 17/30] Update docstring Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/data/utils.py b/monai/data/utils.py index 54cf16ec47..85eda21902 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -217,6 +217,7 @@ def iter_patch( arr: array to iterate over patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the array, default is 0 for each dimension + overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. copy_back: if True data from the yielded patches is copied back to `arr` once the generator completes mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``} From f2a3a2213db75b5902d0e11fd4c6a7858ecd49ff Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 18 May 2022 13:21:06 +0000 Subject: [PATCH 18/30] fix an arg Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/grid_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/data/grid_dataset.py b/monai/data/grid_dataset.py index 33497b5a68..2e389d9e0b 100644 --- a/monai/data/grid_dataset.py +++ b/monai/data/grid_dataset.py @@ -73,6 +73,7 @@ def __call__(self, array: np.ndarray): array, patch_size=self.patch_size, # type: ignore start_pos=self.start_pos, + overlap=0.0, copy_back=False, mode=self.mode, **self.pad_opts, From cc043d42ccbb41fc0b9aeebd003032bd3dfba13b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 23 May 2022 13:04:54 +0000 Subject: [PATCH 19/30] Include padded patches Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index d50d08ad95..d42aac9708 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -151,11 +151,8 @@ def iter_patch_slices( # calculate steps, which depends on the amount of overlap steps = tuple(round(p * (1.0 - overlap)) for p in patch_size_) - # calculate the last starting positions - end_pos = tuple(s - p + 1 for s, p in zip(image_size, patch_size_)) - # collect the ranges to step over each dimension - ranges = tuple(starmap(range, zip(start_pos, end_pos, steps))) + ranges = tuple(starmap(range, zip(start_pos, image_size, steps))) # choose patches by applying product to the ranges for position in product(*ranges): From 0128c3c16ea6e7d53f40bac49a9bfe2fac4db227 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 23 May 2022 23:55:39 +0000 Subject: [PATCH 20/30] Separate overlap for each dimension Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 26 +++++++++++++++----------- monai/data/wsi_datasets.py | 5 +++-- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index d42aac9708..b79fe66b08 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -126,7 +126,7 @@ def iter_patch_slices( image_size: Sequence[int], patch_size: Union[Sequence[int], int], start_pos: Sequence[int] = (), - overlap: float = 0.0, + overlap: Union[Sequence[float], float] = 0.0, ) -> Generator[Tuple[slice, ...], None, None]: """ Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The @@ -137,7 +137,8 @@ def iter_patch_slices( image_size: dimensions of array to iterate over patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the array, default is 0 for each dimension - overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. Yields: Tuples of slice objects defining each patch @@ -147,9 +148,10 @@ def iter_patch_slices( ndim = len(image_size) patch_size_ = get_valid_patch_size(image_size, patch_size) start_pos = ensure_tuple_size(start_pos, ndim) + overlap = ensure_tuple_rep(overlap, ndim) # calculate steps, which depends on the amount of overlap - steps = tuple(round(p * (1.0 - overlap)) for p in patch_size_) + steps = tuple(round(p * (1.0 - o)) for p, o in zip(patch_size_, overlap)) # collect the ranges to step over each dimension ranges = tuple(starmap(range, zip(start_pos, image_size, steps))) @@ -203,7 +205,7 @@ def iter_patch( arr: np.ndarray, patch_size: Union[Sequence[int], int] = 0, start_pos: Sequence[int] = (), - overlap: float = 0.0, + overlap: Union[Sequence[float], float] = 0.0, copy_back: bool = True, mode: Union[NumpyPadMode, str] = NumpyPadMode.WRAP, **pad_opts: Dict, @@ -217,8 +219,8 @@ def iter_patch( arr: array to iterate over patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the array, default is 0 for each dimension - overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. - copy_back: if True data from the yielded patches is copied back to `arr` once the generator completes + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. copy_back: if True data from the yielded patches is copied back to `arr` once the generator completes mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``} One of the listed string values or a user supplied function. Defaults to ``"wrap"``. @@ -267,13 +269,13 @@ def iter_wsi_patch_location( image_size: Sequence[int], patch_size: Union[Sequence[int], int], start_pos: Sequence[int] = (), - overlap: float = 0.0, + overlap: Union[Sequence[float], float] = 0.0, downsample: float = 1.0, padded: bool = False, ): """ Yield successive tuple of locations defining a patch of size `patch_size` from an image of size `image_size`, - with the relative overalpping of `overlap`. The patch is in the resolution level related to `downsample` ratio. + with the relative overlapping of `overlap`. The patch is in the resolution level related to `downsample` ratio. The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't provided. @@ -281,7 +283,8 @@ def iter_wsi_patch_location( image_size: dimensions of image patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the image, default is 0 for each dimension - overlap: the amount of overlap between patches, which is between 0.0 and 1.0. Defaults to 0.0. + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. downsample: the downsample ratio padded: if the image is padded so the patches can go beyond the borders. Defaults to False. Note that the padding depends on the functionality of the underlying whole slide imaging reader, @@ -292,13 +295,14 @@ def iter_wsi_patch_location( """ ndim = len(image_size) patch_size = get_valid_patch_size(image_size, patch_size) - start_pos = ensure_tuple_size(start_pos, ndim) + start_pos = ensure_tuple_rep(start_pos, ndim) + overlap = ensure_tuple_rep(overlap, ndim) # get the patch size at level=0 patch_size_ = tuple(p * downsample for p in patch_size) # calculate steps, which depends on the amount of overlap - steps = tuple(round(p * (1.0 - overlap)) for p in patch_size_) + steps = tuple(round(p * (1.0 - o)) for p, o in zip(patch_size_, overlap)) # calculate the last starting location (depending on the padding) end_pos = image_size if padded else tuple(s - round(p) + 1 for s, p in zip(image_size, patch_size_)) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index c6cf0d5704..79350694f0 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -149,7 +149,8 @@ class SlidingPatchWSIDataset(PatchWSIDataset): data: the list of input samples including image, location, and label (see the note below for more details). size: the size of patch to be extracted from the whole slide image. level: the level at which the patches to be extracted (default to 0). - overlap: the relative amount of overlap (between 0 and 1) for patches in each direction. Defaults to 0. + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. transform: transforms to be executed on input data. reader: the module to be used for loading whole slide imaging. Defaults to cuCIM. If `reader` is @@ -176,7 +177,7 @@ def __init__( data: Sequence, size: Optional[Union[int, Tuple[int, int]]] = None, level: Optional[int] = None, - overlap: float = 0, + overlap: Union[Sequence[float], float] = 0.0, transform: Optional[Callable] = None, reader="cuCIM", **kwargs, From 5466063ff868dec6d401b1bc15ab0754ab618a37 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 00:08:48 +0000 Subject: [PATCH 21/30] Fix docstring issue Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index b79fe66b08..3b0d0ccc34 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -220,7 +220,8 @@ def iter_patch( patch_size: size of patches to generate slices for, 0 or None selects whole dimension start_pos: starting position in the array, default is 0 for each dimension overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). - If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. copy_back: if True data from the yielded patches is copied back to `arr` once the generator completes + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + copy_back: if True data from the yielded patches is copied back to `arr` once the generator completes mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``} One of the listed string values or a user supplied function. Defaults to ``"wrap"``. From eb7d3f2eef5f2d02845022d15ce22ec86f4d4a26 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 00:53:43 +0000 Subject: [PATCH 22/30] Remove patch number Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 79350694f0..c38cc40dc0 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -212,7 +212,7 @@ def _evaluate_patch_coordinates(self, sample): sample["size"] = patch_size sample["level"] = level n_patches = len(locations) - return [{**sample, "location": locations[i], "patch_num": i, "n_patches": n_patches} for i in range(n_patches)] + return [{**sample, "location": loc, "num_patches": n_patches} for loc in locations] def _get_location(self, sample: Dict): return sample["location"] From f6cd795134d4ba79e5e9f7b590f7247f1329f99b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 12:11:55 +0000 Subject: [PATCH 23/30] Change to get_downsample_ratio Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 2 +- monai/data/wsi_reader.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index c38cc40dc0..59296c0317 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -198,7 +198,7 @@ def _evaluate_patch_coordinates(self, sample): level = self._get_level(sample) wsi_size = self.wsi_reader.get_size(wsi_obj, 0) - downsample = self.wsi_reader.get_downsample(wsi_obj, level) + downsample = self.wsi_reader.get_downsample_ratio(wsi_obj, level) locations = list( iter_wsi_patch_location( diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 4c2bfc5000..fdf7de3d63 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -86,9 +86,9 @@ def get_level_count(self, wsi) -> int: raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def get_downsample(self, wsi, level: int) -> float: + def get_downsample_ratio(self, wsi, level: int) -> float: """ - Returns the downsample ratio of the whole slide image at a given level. + Returns the down-sampling ratio of the whole slide image at a given level. Args: wsi: a whole slide image object loaded from a file @@ -302,16 +302,16 @@ def get_size(self, wsi, level: int) -> Tuple[int, int]: """ return self.reader.get_size(wsi, level) - def get_downsample(self, wsi, level: int) -> float: + def get_downsample_ratio(self, wsi, level: int) -> float: """ - Returns the downsample ratio of the whole slide image at a given level. + Returns the down-sampling ratio of the whole slide image at a given level. Args: wsi: a whole slide image object loaded from a file level: the level number where the size is calculated """ - return self.reader.get_downsample(wsi, level) + return self.reader.get_downsample_ratio(wsi, level) def get_file_path(self, wsi) -> str: """Return the file path for the WSI object""" @@ -393,9 +393,9 @@ def get_size(wsi, level: int) -> Tuple[int, int]: return (wsi.resolutions["level_dimensions"][level][1], wsi.resolutions["level_dimensions"][level][0]) @staticmethod - def get_downsample(wsi, level: int) -> float: + def get_downsample_ratio(wsi, level: int) -> float: """ - Returns the downsample ratio of the whole slide image at a given level. + Returns the down-sampling ratio of the whole slide image at a given level. Args: wsi: a whole slide image object loaded from a file @@ -511,9 +511,9 @@ def get_size(wsi, level: int) -> Tuple[int, int]: return (wsi.level_dimensions[level][1], wsi.level_dimensions[level][0]) @staticmethod - def get_downsample(wsi, level: int) -> float: + def get_downsample_ratio(wsi, level: int) -> float: """ - Returns the downsample ratio of the whole slide image at a given level. + Returns the down-sampling ratio of the whole slide image at a given level. Args: wsi: a whole slide image object loaded from a file From e5cb3abedde393333479cc1288a9b60b62b86e3b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 12:15:43 +0000 Subject: [PATCH 24/30] minor fixes Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index 3b0d0ccc34..b01f16f00a 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -286,10 +286,10 @@ def iter_wsi_patch_location( start_pos: starting position in the image, default is 0 for each dimension overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. - downsample: the downsample ratio + downsample: the down-sampling ratio of the patch with respect to level 0. padded: if the image is padded so the patches can go beyond the borders. Defaults to False. Note that the padding depends on the functionality of the underlying whole slide imaging reader, - and is not guranteed for all images. + and is not guaranteed for all images. Yields: Tuple of patch location @@ -309,7 +309,7 @@ def iter_wsi_patch_location( end_pos = image_size if padded else tuple(s - round(p) + 1 for s, p in zip(image_size, patch_size_)) # evaluate the starting locations for patches - return product(*tuple(starmap(range, zip(start_pos, end_pos, steps)))) + return product(*starmap(range, zip(start_pos, end_pos, steps))) def get_valid_patch_size(image_size: Sequence[int], patch_size: Union[Sequence[int], int]) -> Tuple[int, ...]: From 7437854572d0eb5137d0972abacce46ed75021db Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 14:13:02 +0000 Subject: [PATCH 25/30] Implemenet random offset Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 55 +++++++++++++++++++++---- tests/test_sliding_patch_wsi_dataset.py | 28 ++++++++++++- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 59296c0317..59f39b0c4a 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -15,7 +15,7 @@ import numpy as np from monai.data import Dataset -from monai.data.utils import iter_wsi_patch_location +from monai.data.utils import iter_patch_location from monai.data.wsi_reader import BaseWSIReader, WSIReader from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep @@ -149,6 +149,9 @@ class SlidingPatchWSIDataset(PatchWSIDataset): data: the list of input samples including image, location, and label (see the note below for more details). size: the size of patch to be extracted from the whole slide image. level: the level at which the patches to be extracted (default to 0). + offset: the offset of image to extract patches (the starting position of the upper left patch). + offset_limits: if offset is set to "random", a tuple of integers defining the lower and upper limit of the + random offset for all dimensions, or a tuple of tuples that defines the limits for each dimension. overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. transform: transforms to be executed on input data. @@ -177,7 +180,9 @@ def __init__( data: Sequence, size: Optional[Union[int, Tuple[int, int]]] = None, level: Optional[int] = None, - overlap: Union[Sequence[float], float] = 0.0, + overlap: Union[Tuple[float, float], float] = 0.0, + offset: Union[Tuple[int, int], int, str] = (0, 0), + offset_limits: Optional[Union[Tuple[Tuple[int, int], Tuple[int, int]], Tuple[int, int]]] = None, transform: Optional[Callable] = None, reader="cuCIM", **kwargs, @@ -185,27 +190,63 @@ def __init__( super().__init__(data=data, size=size, level=level, transform=transform, reader=reader, **kwargs) self.overlap = overlap + # Set the offset config + self.random_offset = False + if isinstance(offset, str): + if offset == "random": + self.random_offset = True + self.offset_limits: Optional[Tuple[Tuple[int, int], Tuple[int, int]]] + if offset_limits is None: + self.offset_limits = None + elif isinstance(offset_limits, tuple): + if isinstance(offset_limits[0], int): + self.offset_limits = (offset_limits, offset_limits) + elif isinstance(offset_limits[0], tuple): + self.offset_limits = offset_limits + else: + ValueError( + "The offset limits should be either a tuple of integers or tuple of tuple of integers." + ) + else: + ValueError("The offset limits should be a tuple.") + else: + ValueError( + f'Invalid string for offset "{offset}". It should be either "random" as a string,' + "an integer, or a tuple of integers defining the offset." + ) + else: + self.offset = ensure_tuple_rep(offset, 2) + # Create single sample for each patch (in a sliding window manner) self.data = [] for sample in data: sliding_samples = self._evaluate_patch_coordinates(sample) self.data.extend(sliding_samples) + def _get_offset(self, sample): + if self.random_offset: + if self.offset_limits is None: + offset_limits = tuple(tuple([-s, s]) for s in self._get_size(sample)) + else: + offset_limits = self.offset_limits + return tuple(np.random.randint(low, high) for low, high in offset_limits) + return self.offset + def _evaluate_patch_coordinates(self, sample): """Define the location for each patch based on sliding-window approach""" wsi_obj = self._get_wsi_object(sample) patch_size = self._get_size(sample) level = self._get_level(sample) + start_pos = self._get_offset(sample) wsi_size = self.wsi_reader.get_size(wsi_obj, 0) downsample = self.wsi_reader.get_downsample_ratio(wsi_obj, level) - + patch_size_ = tuple(p * downsample for p in patch_size) # patch size at level 0 locations = list( - iter_wsi_patch_location( + iter_patch_location( image_size=wsi_size, - patch_size=patch_size, - start_pos=(0, 0), - downsample=downsample, + patch_size=patch_size_, + start_pos=start_pos, overlap=self.overlap, ) ) diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index 0ab5bfca46..27f4e687ad 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -17,9 +17,11 @@ from parameterized import parameterized from monai.data import SlidingPatchWSIDataset -from monai.utils import optional_import +from monai.utils import optional_import, set_determinism from tests.utils import download_url_or_skip_test, testing_data_config +set_determinism(0) + cucim, has_cucim = optional_import("cucim") has_cucim = has_cucim and hasattr(cucim, "CuImage") openslide, has_osl = optional_import("openslide") @@ -134,6 +136,26 @@ ], ] + +TEST_CASE_SMALL_7 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}], "offset": (1, 0)}, + [{"image": ARRAY_SMALL_0[:, 1:3, :2]}, {"image": ARRAY_SMALL_0[:, 1:3, 2:]}], +] + +TEST_CASE_SMALL_8 = [ + {"data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}], "offset": "random", "offset_limits": (0, 2)}, + [{"image": ARRAY_SMALL_0[:, :2, 1:3]}, {"image": ARRAY_SMALL_0[:, 2:, 1:3]}], +] + +TEST_CASE_SMALL_9 = [ + { + "data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}], + "offset": "random", + "offset_limits": ((0, 2), (0, 3)), + }, + [{"image": ARRAY_SMALL_0[:, :2, 1:3]}, {"image": ARRAY_SMALL_0[:, 2:, 1:3]}], +] + TEST_CASE_LARGE_0 = [ {"data": [{"image": FILE_PATH, "level": 8, "size": (64, 50)}]}, [ @@ -193,6 +215,9 @@ class Tests(unittest.TestCase): TEST_CASE_SMALL_4, TEST_CASE_SMALL_5, TEST_CASE_SMALL_6, + TEST_CASE_SMALL_7, + TEST_CASE_SMALL_8, + TEST_CASE_SMALL_9, ] ) def test_read_patches(self, input_parameters, expected): @@ -201,6 +226,7 @@ def test_read_patches(self, input_parameters, expected): dataset = SlidingPatchWSIDataset(reader=self.backend, **input_parameters) self.assertEqual(len(dataset), len(expected)) for i, sample in enumerate(dataset): + # print(f"{sample=}") self.assertTupleEqual(sample["image"].shape, expected[i]["image"].shape) @parameterized.expand([TEST_CASE_LARGE_0, TEST_CASE_LARGE_1]) From 441abd29616eaba9fd8155e78e1d56fb0d63b300 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 24 May 2022 14:34:03 +0000 Subject: [PATCH 26/30] Combine iter_patch_slices and iter_wsi_patch_locations Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 2 +- monai/data/utils.py | 138 +++++++++++++------------------------ monai/data/wsi_datasets.py | 11 ++- 3 files changed, 54 insertions(+), 97 deletions(-) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 66f8efe9b9..a61875075f 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -83,7 +83,7 @@ get_valid_patch_size, is_supported_format, iter_patch, - iter_patch_slices, + iter_patch_position, json_hashing, list_data_collate, orientation_ras_lps, diff --git a/monai/data/utils.py b/monai/data/utils.py index b01f16f00a..8d28863b10 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -21,7 +21,7 @@ from functools import reduce from itertools import product, starmap, zip_longest from pathlib import PurePath -from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union import numpy as np import torch @@ -67,7 +67,7 @@ "get_valid_patch_size", "is_supported_format", "iter_patch", - "iter_patch_slices", + "iter_patch_position", "json_hashing", "list_data_collate", "no_collation", @@ -122,45 +122,6 @@ def get_random_patch( return tuple(slice(mc, mc + ps) for mc, ps in zip(min_corner, patch_size)) -def iter_patch_slices( - image_size: Sequence[int], - patch_size: Union[Sequence[int], int], - start_pos: Sequence[int] = (), - overlap: Union[Sequence[float], float] = 0.0, -) -> Generator[Tuple[slice, ...], None, None]: - """ - Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The - iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each - patch is chosen in a contiguous grid using a first dimension as least significant ordering. - - Args: - image_size: dimensions of array to iterate over - patch_size: size of patches to generate slices for, 0 or None selects whole dimension - start_pos: starting position in the array, default is 0 for each dimension - overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). - If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. - - Yields: - Tuples of slice objects defining each patch - """ - - # ensure patchSize and startPos are the right length - ndim = len(image_size) - patch_size_ = get_valid_patch_size(image_size, patch_size) - start_pos = ensure_tuple_size(start_pos, ndim) - overlap = ensure_tuple_rep(overlap, ndim) - - # calculate steps, which depends on the amount of overlap - steps = tuple(round(p * (1.0 - o)) for p, o in zip(patch_size_, overlap)) - - # collect the ranges to step over each dimension - ranges = tuple(starmap(range, zip(start_pos, image_size, steps))) - - # choose patches by applying product to the ranges - for position in product(*ranges): - yield tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) - - def dense_patch_slices( image_size: Sequence[int], patch_size: Sequence[int], scan_interval: Sequence[int] ) -> List[Tuple[slice, ...]]: @@ -201,6 +162,48 @@ def dense_patch_slices( return [tuple(slice(s, s + patch_size[d]) for d, s in enumerate(x)) for x in out] +def iter_patch_position( + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + start_pos: Sequence[int] = (), + overlap: Union[Sequence[float], float] = 0.0, + padded: bool = False, +): + """ + Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The + iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each + patch is chosen in a contiguous grid using a first dimension as least significant ordering. + + Args: + image_size: dimensions of array to iterate over + patch_size: size of patches to generate slices for, 0 or None selects whole dimension + start_pos: starting position in the array, default is 0 for each dimension + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + + Yields: + Tuples of positions defining the upper left corner of each patch + """ + + # ensure patchSize and startPos are the right length + ndim = len(image_size) + patch_size_ = get_valid_patch_size(image_size, patch_size) + start_pos = ensure_tuple_size(start_pos, ndim) + overlap = ensure_tuple_rep(overlap, ndim) + + # calculate steps, which depends on the amount of overlap + steps = tuple(round(p * (1.0 - o)) for p, o in zip(patch_size_, overlap)) + + # calculate the last starting location (depending on the padding) + end_pos = image_size if padded else tuple(s - round(p) + 1 for s, p in zip(image_size, patch_size_)) + + # collect the ranges to step over each dimension + ranges = starmap(range, zip(start_pos, end_pos, steps)) + + # choose patches by applying product to the ranges + return product(*ranges) + + def iter_patch( arr: np.ndarray, patch_size: Union[Sequence[int], int] = 0, @@ -255,9 +258,12 @@ def iter_patch( # patches which are only in the padded regions iter_size = tuple(s + p for s, p in zip(arr.shape, patch_size_)) - for slices in iter_patch_slices(iter_size, patch_size_, start_pos_padded, overlap): + for position in iter_patch_position( + image_size=iter_size, patch_size=patch_size_, start_pos=start_pos_padded, overlap=overlap, padded=True + ): + slices = tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) # compensate original image padding - coords_no_pad = tuple((coord.start - p, coord.stop - p) for coord, p in zip(slices, patch_size_)) + coords_no_pad = tuple((s - p, s) for s, p in zip(position, patch_size_)) yield arrpad[slices], np.asarray(coords_no_pad) # data and coords (in numpy; works with torch loader) # copy back data from the padded image if required @@ -266,52 +272,6 @@ def iter_patch( arr[...] = arrpad[slices] -def iter_wsi_patch_location( - image_size: Sequence[int], - patch_size: Union[Sequence[int], int], - start_pos: Sequence[int] = (), - overlap: Union[Sequence[float], float] = 0.0, - downsample: float = 1.0, - padded: bool = False, -): - """ - Yield successive tuple of locations defining a patch of size `patch_size` from an image of size `image_size`, - with the relative overlapping of `overlap`. The patch is in the resolution level related to `downsample` ratio. - The iteration starts from position `start_pos` in the whole slide image, or starting at the origin if this isn't - provided. - - Args: - image_size: dimensions of image - patch_size: size of patches to generate slices for, 0 or None selects whole dimension - start_pos: starting position in the image, default is 0 for each dimension - overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). - If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. - downsample: the down-sampling ratio of the patch with respect to level 0. - padded: if the image is padded so the patches can go beyond the borders. Defaults to False. - Note that the padding depends on the functionality of the underlying whole slide imaging reader, - and is not guaranteed for all images. - - Yields: - Tuple of patch location - """ - ndim = len(image_size) - patch_size = get_valid_patch_size(image_size, patch_size) - start_pos = ensure_tuple_rep(start_pos, ndim) - overlap = ensure_tuple_rep(overlap, ndim) - - # get the patch size at level=0 - patch_size_ = tuple(p * downsample for p in patch_size) - - # calculate steps, which depends on the amount of overlap - steps = tuple(round(p * (1.0 - o)) for p, o in zip(patch_size_, overlap)) - - # calculate the last starting location (depending on the padding) - end_pos = image_size if padded else tuple(s - round(p) + 1 for s, p in zip(image_size, patch_size_)) - - # evaluate the starting locations for patches - return product(*starmap(range, zip(start_pos, end_pos, steps))) - - def get_valid_patch_size(image_size: Sequence[int], patch_size: Union[Sequence[int], int]) -> Tuple[int, ...]: """ Given an image of dimensions `image_size`, return a patch size tuple taking the dimension from `patch_size` if this is diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 59f39b0c4a..8620d966c9 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -15,7 +15,7 @@ import numpy as np from monai.data import Dataset -from monai.data.utils import iter_patch_location +from monai.data.utils import iter_patch_position from monai.data.wsi_reader import BaseWSIReader, WSIReader from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep @@ -226,7 +226,7 @@ def __init__( def _get_offset(self, sample): if self.random_offset: if self.offset_limits is None: - offset_limits = tuple(tuple([-s, s]) for s in self._get_size(sample)) + offset_limits = tuple((-s, s) for s in self._get_size(sample)) else: offset_limits = self.offset_limits return tuple(np.random.randint(low, high) for low, high in offset_limits) @@ -243,11 +243,8 @@ def _evaluate_patch_coordinates(self, sample): downsample = self.wsi_reader.get_downsample_ratio(wsi_obj, level) patch_size_ = tuple(p * downsample for p in patch_size) # patch size at level 0 locations = list( - iter_patch_location( - image_size=wsi_size, - patch_size=patch_size_, - start_pos=start_pos, - overlap=self.overlap, + iter_patch_position( + image_size=wsi_size, patch_size=patch_size_, start_pos=start_pos, overlap=self.overlap, padded=False ) ) sample["size"] = patch_size From d998eb5a4203bc84a3a297d125d8758b56e9a75d Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 25 May 2022 02:40:23 +0000 Subject: [PATCH 27/30] Update iter_patch_slices Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index 8d28863b10..0957de9510 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -21,7 +21,7 @@ from functools import reduce from itertools import product, starmap, zip_longest from pathlib import PurePath -from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Tuple, Union import numpy as np import torch @@ -68,6 +68,7 @@ "is_supported_format", "iter_patch", "iter_patch_position", + "iter_patch_slices", "json_hashing", "list_data_collate", "no_collation", @@ -162,6 +163,40 @@ def dense_patch_slices( return [tuple(slice(s, s + patch_size[d]) for d, s in enumerate(x)) for x in out] +def iter_patch_slices( + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + start_pos: Sequence[int] = (), + overlap: Union[Sequence[float], float] = 0.0, + padded: bool = True, +) -> Generator[Tuple[slice, ...], None, None]: + """ + Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The + iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each + patch is chosen in a contiguous grid using a first dimension as least significant ordering. + + Args: + image_size: dimensions of array to iterate over + patch_size: size of patches to generate slices for, 0 or None selects whole dimension + start_pos: starting position in the array, default is 0 for each dimension + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + padded: if the image is padded so the patches can go beyond the borders. Defaults to False. + + Yields: + Tuples of slice objects defining each patch + """ + + # ensure patch_size has the right length + patch_size_ = get_valid_patch_size(image_size, patch_size) + + # create slices based on start position of each patch + for position in iter_patch_position( + image_size=image_size, patch_size=patch_size_, start_pos=start_pos, overlap=overlap, padded=padded + ): + yield tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) + + def iter_patch_position( image_size: Sequence[int], patch_size: Union[Sequence[int], int], @@ -180,6 +215,7 @@ def iter_patch_position( start_pos: starting position in the array, default is 0 for each dimension overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + padded: if the image is padded so the patches can go beyond the borders. Defaults to False. Yields: Tuples of positions defining the upper left corner of each patch @@ -258,12 +294,9 @@ def iter_patch( # patches which are only in the padded regions iter_size = tuple(s + p for s, p in zip(arr.shape, patch_size_)) - for position in iter_patch_position( - image_size=iter_size, patch_size=patch_size_, start_pos=start_pos_padded, overlap=overlap, padded=True - ): - slices = tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) + for slices in iter_patch_slices(iter_size, patch_size_, start_pos_padded, overlap): # compensate original image padding - coords_no_pad = tuple((s - p, s) for s, p in zip(position, patch_size_)) + coords_no_pad = tuple((coord.start - p, coord.stop - p) for coord, p in zip(slices, patch_size_)) yield arrpad[slices], np.asarray(coords_no_pad) # data and coords (in numpy; works with torch loader) # copy back data from the padded image if required From 544712809b666e0087e2b17b530860eb69dd54c5 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 25 May 2022 12:14:48 +0000 Subject: [PATCH 28/30] Move iter_patch_slices and fix docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/utils.py | 74 ++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/monai/data/utils.py b/monai/data/utils.py index 0957de9510..e56d8b86e9 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -123,6 +123,40 @@ def get_random_patch( return tuple(slice(mc, mc + ps) for mc, ps in zip(min_corner, patch_size)) +def iter_patch_slices( + image_size: Sequence[int], + patch_size: Union[Sequence[int], int], + start_pos: Sequence[int] = (), + overlap: Union[Sequence[float], float] = 0.0, + padded: bool = True, +) -> Generator[Tuple[slice, ...], None, None]: + """ + Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `image_size`. + The iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each + patch is chosen in a contiguous grid using a rwo-major ordering. + + Args: + image_size: dimensions of array to iterate over + patch_size: size of patches to generate slices for, 0 or None selects whole dimension + start_pos: starting position in the array, default is 0 for each dimension + overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). + If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. + padded: if the image is padded so the patches can go beyond the borders. Defaults to False. + + Yields: + Tuples of slice objects defining each patch + """ + + # ensure patch_size has the right length + patch_size_ = get_valid_patch_size(image_size, patch_size) + + # create slices based on start position of each patch + for position in iter_patch_position( + image_size=image_size, patch_size=patch_size_, start_pos=start_pos, overlap=overlap, padded=padded + ): + yield tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) + + def dense_patch_slices( image_size: Sequence[int], patch_size: Sequence[int], scan_interval: Sequence[int] ) -> List[Tuple[slice, ...]]: @@ -163,40 +197,6 @@ def dense_patch_slices( return [tuple(slice(s, s + patch_size[d]) for d, s in enumerate(x)) for x in out] -def iter_patch_slices( - image_size: Sequence[int], - patch_size: Union[Sequence[int], int], - start_pos: Sequence[int] = (), - overlap: Union[Sequence[float], float] = 0.0, - padded: bool = True, -) -> Generator[Tuple[slice, ...], None, None]: - """ - Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The - iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each - patch is chosen in a contiguous grid using a first dimension as least significant ordering. - - Args: - image_size: dimensions of array to iterate over - patch_size: size of patches to generate slices for, 0 or None selects whole dimension - start_pos: starting position in the array, default is 0 for each dimension - overlap: the amount of overlap of neighboring patches in each dimension (a value between 0.0 and 1.0). - If only one float number is given, it will be applied to all dimensions. Defaults to 0.0. - padded: if the image is padded so the patches can go beyond the borders. Defaults to False. - - Yields: - Tuples of slice objects defining each patch - """ - - # ensure patch_size has the right length - patch_size_ = get_valid_patch_size(image_size, patch_size) - - # create slices based on start position of each patch - for position in iter_patch_position( - image_size=image_size, patch_size=patch_size_, start_pos=start_pos, overlap=overlap, padded=padded - ): - yield tuple(slice(s, s + p) for s, p in zip(position, patch_size_)) - - def iter_patch_position( image_size: Sequence[int], patch_size: Union[Sequence[int], int], @@ -205,9 +205,9 @@ def iter_patch_position( padded: bool = False, ): """ - Yield successive tuples of slices defining patches of size `patch_size` from an array of dimensions `dims`. The - iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each - patch is chosen in a contiguous grid using a first dimension as least significant ordering. + Yield successive tuples of upper left corner of patches of size `patch_size` from an array of dimensions `image_size`. + The iteration starts from position `start_pos` in the array, or starting at the origin if this isn't provided. Each + patch is chosen in a contiguous grid using a rwo-major ordering. Args: image_size: dimensions of array to iterate over From 3ebd8c113ad1f07ef8d328226b00cf10249d484c Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 25 May 2022 12:18:38 +0000 Subject: [PATCH 29/30] Update init Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index a61875075f..40ee3cfc29 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -84,6 +84,7 @@ is_supported_format, iter_patch, iter_patch_position, + iter_patch_slices, json_hashing, list_data_collate, orientation_ras_lps, From e219e58ee04efb200bdf8724d0af13f7f9c02756 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 25 May 2022 23:58:33 +0000 Subject: [PATCH 30/30] Fixed randomness Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 12 +++++++----- tests/test_sliding_patch_wsi_dataset.py | 5 ++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 8620d966c9..6fe5435d57 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -17,7 +17,7 @@ from monai.data import Dataset from monai.data.utils import iter_patch_position from monai.data.wsi_reader import BaseWSIReader, WSIReader -from monai.transforms import apply_transform +from monai.transforms import Randomizable, apply_transform from monai.utils import ensure_tuple_rep __all__ = ["PatchWSIDataset", "SlidingPatchWSIDataset"] @@ -140,7 +140,7 @@ def _transform(self, index: int): return apply_transform(self.transform, output) if self.transform else output -class SlidingPatchWSIDataset(PatchWSIDataset): +class SlidingPatchWSIDataset(Randomizable, PatchWSIDataset): """ This dataset extracts patches from whole slide images (without loading the whole image) It also reads labels for each patch and provides each patch with its associated class labels. @@ -161,6 +161,7 @@ class SlidingPatchWSIDataset(PatchWSIDataset): - a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader, - an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. + seed: random seed to randomly generate offsets. Defaults to 0. kwargs: additional arguments to pass to `WSIReader` or provided whole slide reader class Note: @@ -185,11 +186,12 @@ def __init__( offset_limits: Optional[Union[Tuple[Tuple[int, int], Tuple[int, int]], Tuple[int, int]]] = None, transform: Optional[Callable] = None, reader="cuCIM", + seed: int = 0, **kwargs, ): super().__init__(data=data, size=size, level=level, transform=transform, reader=reader, **kwargs) self.overlap = overlap - + self.set_random_state(seed) # Set the offset config self.random_offset = False if isinstance(offset, str): @@ -229,16 +231,16 @@ def _get_offset(self, sample): offset_limits = tuple((-s, s) for s in self._get_size(sample)) else: offset_limits = self.offset_limits - return tuple(np.random.randint(low, high) for low, high in offset_limits) + return tuple(self.R.randint(low, high) for low, high in offset_limits) return self.offset def _evaluate_patch_coordinates(self, sample): """Define the location for each patch based on sliding-window approach""" - wsi_obj = self._get_wsi_object(sample) patch_size = self._get_size(sample) level = self._get_level(sample) start_pos = self._get_offset(sample) + wsi_obj = self._get_wsi_object(sample) wsi_size = self.wsi_reader.get_size(wsi_obj, 0) downsample = self.wsi_reader.get_downsample_ratio(wsi_obj, level) patch_size_ = tuple(p * downsample for p in patch_size) # patch size at level 0 diff --git a/tests/test_sliding_patch_wsi_dataset.py b/tests/test_sliding_patch_wsi_dataset.py index 27f4e687ad..1eaa0292c5 100644 --- a/tests/test_sliding_patch_wsi_dataset.py +++ b/tests/test_sliding_patch_wsi_dataset.py @@ -144,14 +144,14 @@ TEST_CASE_SMALL_8 = [ {"data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}], "offset": "random", "offset_limits": (0, 2)}, - [{"image": ARRAY_SMALL_0[:, :2, 1:3]}, {"image": ARRAY_SMALL_0[:, 2:, 1:3]}], + [{"image": ARRAY_SMALL_0[:, 1:3, :2]}, {"image": ARRAY_SMALL_0[:, 1:3, 2:]}], ] TEST_CASE_SMALL_9 = [ { "data": [{"image": FILE_PATH_SMALL_0, "level": 0, "size": (2, 2)}], "offset": "random", - "offset_limits": ((0, 2), (0, 3)), + "offset_limits": ((0, 3), (0, 2)), }, [{"image": ARRAY_SMALL_0[:, :2, 1:3]}, {"image": ARRAY_SMALL_0[:, 2:, 1:3]}], ] @@ -226,7 +226,6 @@ def test_read_patches(self, input_parameters, expected): dataset = SlidingPatchWSIDataset(reader=self.backend, **input_parameters) self.assertEqual(len(dataset), len(expected)) for i, sample in enumerate(dataset): - # print(f"{sample=}") self.assertTupleEqual(sample["image"].shape, expected[i]["image"].shape) @parameterized.expand([TEST_CASE_LARGE_0, TEST_CASE_LARGE_1])