From 753decab155fb933b6c67b83d81b632618b394d7 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:18:59 -0500 Subject: [PATCH 01/35] Implement CuImageReader and OpenSlideReader Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 213 +++++++++++++++++++++++++++++++++++-- 1 file changed, 206 insertions(+), 7 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index d0f5f4aefc..786ec6e71c 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -16,7 +16,7 @@ import numpy as np from torch.utils.data._utils.collate import np_str_obj_array_pattern -from monai.config import DtypeLike, KeysCollection +from monai.config import KeysCollection from monai.data.utils import correct_nifti_header_if_necessary from monai.utils import ensure_tuple, optional_import @@ -28,16 +28,20 @@ from itk import Image # type: ignore from nibabel.nifti1 import Nifti1Image from PIL import Image as PILImage + import cuimage + import openslide - has_itk = has_nib = has_pil = True + has_itk = has_nib = has_pil = has_cux = True else: itk, has_itk = optional_import("itk", allow_namespace_pkg=True) Image, _ = optional_import("itk", allow_namespace_pkg=True, name="Image") nib, has_nib = optional_import("nibabel") Nifti1Image, _ = optional_import("nibabel.nifti1", name="Nifti1Image") PILImage, has_pil = optional_import("PIL.Image") + cuimage, has_cux = optional_import("cuimage") + openslide, has_osl = optional_import("openslide") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "CuImageReader", "OpenSlide"] class ImageReader(ABC): @@ -244,7 +248,7 @@ def _get_affine(self, img) -> np.ndarray: affine = np.eye(direction.shape[0] + 1) affine[(slice(-1), slice(-1))] = direction @ np.diag(spacing) affine[(slice(-1), -1)] = origin - return np.asarray(affine) + return affine def _get_spatial_shape(self, img) -> np.ndarray: """ @@ -258,7 +262,7 @@ def _get_spatial_shape(self, img) -> np.ndarray: shape.reverse() return np.asarray(shape) - def _get_array_data(self, img): + def _get_array_data(self, img) -> np.ndarray: """ Get the raw array data of the image, converted to Numpy array. @@ -295,7 +299,7 @@ class NibabelReader(ImageReader): """ - def __init__(self, as_closest_canonical: bool = False, dtype: DtypeLike = np.float32, **kwargs): + def __init__(self, as_closest_canonical: bool = False, dtype: Optional[np.dtype] = np.float32, **kwargs): super().__init__() self.as_closest_canonical = as_closest_canonical self.dtype = dtype @@ -385,7 +389,7 @@ def _get_affine(self, img) -> np.ndarray: img: a Nibabel image object loaded from a image file. """ - return np.array(img.affine, copy=True) + return img.affine.copy() def _get_spatial_shape(self, img) -> np.ndarray: """ @@ -598,3 +602,198 @@ def _get_spatial_shape(self, img) -> np.ndarray: img: a PIL Image object loaded from a image file. """ return np.asarray((img.width, img.height)) + + +class CuImageReader(ImageReader): + """ + Extraxt 2D patches from TIFF image file(s) + + Args: + converter: additional function to convert the image data after `read()`. + """ + + def __init__(self, converter: Optional[Callable] = None): + super().__init__() + self.converter = converter + + def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: + """ + Verify whether the specified file or files format is supported by WSI reader. + + Args: + filename: file name or a list of file names to read. + if a list of files, verify all the suffixes. + """ + return has_cux and is_supported_format(filename, ["tif", "tiff"]) + + def read(self, data: Union[Sequence[str], str, np.ndarray]): + """ + Read image data from specified file or files. + Note that the returned object is CuImage or list of CuImage objects. + + Args: + data: file name or a list of file names to read. + + """ + img_: List[cuimage.CuImage] = [] + + filenames: Sequence[str] = ensure_tuple(data) + for name in filenames: + img = cuimage.CuImage(name) + img_.append(img) + + return img_ if len(filenames) > 1 else img_[0] + + def get_data( + self, + img_obj, + location=(0, 0), + size=None, + level=0, + dtype=np.uint8, + grid_shape=(1, 1), + patch_size=None + ): + """ + Extract regions as numpy array from WSI image and return them. + + Args: + img: a CuImage object loaded from a file, or list of CuImage objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, + or list of tuples (default=(0, 0)) + size: (width, height) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) + This is the size of image at `level=0` + level: the level number, or list of level numbers (default=0) + + """ + if size is None: + if location == (0, 0): + # the maximum size is set to WxH + size = (img_obj.shape[1], img_obj.shape[0]) + print("Size is set to maximum size: ", size) + else: + print("Size need to be provided!") + return + region = self._extract_region(img_obj, location, size, level, dtype) + + if patch_size is None: + patch_size = region.shape[1:] + patches = _extract_patches(region, grid_shape, patch_size, dtype) + return patches + + def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): + size = [s * (2 ** level) for s in size] + region = img_obj.read_region(location=location, size=size, level=level) + region = np.asarray(region, dtype=dtype) + # CuImage: (H x W x C) -> torch image: (C X H X W) + region = region.transpose((2, 0, 1)) + return region + + +class OpenSlideReader(ImageReader): + """ + Extraxt 2D patches from TIFF image file(s) + + Args: + converter: additional function to convert the image data after `read()`. + """ + + def __init__(self, converter: Optional[Callable] = None): + super().__init__() + self.converter = converter + + def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: + """ + Verify whether the specified file or files format is supported by WSI reader. + + Args: + filename: file name or a list of file names to read. + if a list of files, verify all the suffixes. + """ + return has_cux and is_supported_format(filename, ["tif", "tiff"]) + + def read(self, data: Union[Sequence[str], str, np.ndarray]): + """ + Read image data from specified file or files. + Note that the returned object is OpenSlide or list of OpenSlide objects. + + Args: + data: file name or a list of file names to read. + + """ + img_: List[openslide.OpenSlide] = [] + + filenames: Sequence[str] = ensure_tuple(data) + for name in filenames: + img = openslide.OpenSlide(name) + img_.append(img) + + return img_ if len(filenames) > 1 else img_[0] + + def get_data( + self, + img_obj, + location=(0, 0), + size=None, + level=0, + dtype=np.uint8, + grid_shape=(1, 1), + patch_size=None + ): + """ + Extract regions as numpy array from WSI image and return them. + + Args: + img: a OpenSlide object loaded from a file, or list of OpenSlide objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, + or list of tuples (default=(0, 0)) + size: (width, height) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) + This is the size of the image at the given `level` + level: the level number, or list of level numbers (default=0) + + """ + if size is None: + if location == (0, 0): + # the maximum size is set to WxH + size = (img_obj.shape[1], img_obj.shape[0]) + print("Size is set to maximum size: ", size) + else: + print("Size need to be provided!") + return + region = self._extract_region(img_obj, location, size, level, dtype) + + if patch_size is None: + patch_size = region.shape[1:] + patches = _extract_patches(region, grid_shape, patch_size, dtype) + return patches + + def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): + region = img_obj.read_region(location=location, level=level, size=size) + region = region.convert("RGB") + region = np.asarray(region, dtype=dtype) + # OpenSlide: (H x W x C) -> torch image: (C X H X W) + region = region.transpose((2, 0, 1)) + return region + +def _extract_patches(region, grid_shape, patch_size, dtype=np.uint8): + if grid_shape == (1, 1): + return region + + n_patches = np.prod(grid_shape) + region_size = region.shape[1:] + + # split the region into patches on the grid and center crop them to patch size + flat_patch_grid = np.zeros((n_patches, 3, patch_size, patch_size), dtype=dtype) + start_points = [ + np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size / 2).astype(int) + for i in range(2) + ] + idx = 0 + for y_start in start_points[1]: + for x_start in start_points[0]: + x_end = x_start + patch_size + y_end = y_start + patch_size + flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] + idx += 1 + + return flat_patch_grid \ No newline at end of file From ddbd6abd1aa94c1716e76e5a9ed27a1da90736ee Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:21:16 -0500 Subject: [PATCH 02/35] Add unittests for CuImageReader Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/test_cuimage_reader.py diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py new file mode 100644 index 0000000000..0dc50c74ba --- /dev/null +++ b/tests/test_cuimage_reader.py @@ -0,0 +1,71 @@ +import ftplib +import os +import unittest + +import numpy as np +from numpy.testing import assert_array_equal +from parameterized import parameterized +from monai.data.image_reader import CuImageReader + +filename = "test_001.tif" + + +TEST_CASE_1 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 4}, + np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), +] + +TEST_CASE_2 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 2}, + np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), +] + +TEST_CASE_3 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + np.array( + [ + [[[218, 242], [189, 198]], [[154, 173], [125, 132]], [[214, 236], [185, 194]]], + [[[190, 209], [221, 228]], [[120, 137], [149, 154]], [[180, 200], [212, 217]]], + ] + ), +] + + +class TestCuImageReader(unittest.TestCase): + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + def test_read_region(self, filename, patch_info, expected_img): + self.camelyon_data_download(filename) + reader = CuImageReader() + img_obj = reader.read(filename) + img = reader.get_data(img_obj, **patch_info) + + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + @parameterized.expand([TEST_CASE_3]) + def test_read_region(self, filename, patch_info, expected_img): + self.camelyon_data_download(filename) + reader = CuImageReader() + img_obj = reader.read(filename) + img = reader.get_data(img_obj, **patch_info) + print("img.shape: ", img.shape) + print("img: ", img) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + def camelyon_data_download(self, filename): + if not os.path.exists(filename): + print(f"Test image [{filename}] does not exists downloading...") + path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" + ftp = ftplib.FTP("parrot.genomics.cn") + ftp.login("anonymous", "") + ftp.cwd(path) + ftp.retrbinary("RETR " + filename, open(filename, "wb").write) + ftp.quit() + + +if __name__ == "__main__": + unittest.main() From 7e7744935cf9e9e51322b7a7ccb46cccbb20fe92 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:21:36 -0500 Subject: [PATCH 03/35] Add unittests for OpenSlideReader Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_openslide_reader.py | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/test_openslide_reader.py diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py new file mode 100644 index 0000000000..258a1046f7 --- /dev/null +++ b/tests/test_openslide_reader.py @@ -0,0 +1,60 @@ +import ftplib +import os +import unittest + +import numpy as np +from numpy.testing import assert_array_equal +from parameterized import parameterized +from monai.data.image_reader import OpenSlideReader + +filename = "test_001.tif" + + +TEST_CASE_1 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 4}, + np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), +] + +TEST_CASE_2 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 2}, + np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), +] + +TEST_CASE_3 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + np.array( + [ + [[[218, 242], [189, 198]], [[154, 173], [125, 132]], [[214, 236], [185, 194]]], + [[[190, 209], [221, 228]], [[120, 137], [149, 154]], [[180, 200], [212, 217]]], + ] + ), +] + + +class TestOpenSlideReader(unittest.TestCase): + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + def test_read_region(self, filename, patch_info, expected_img): + self.camelyon_data_download(filename) + reader = OpenSlideReader() + img_obj = reader.read(filename) + img = reader.get_data(img_obj, **patch_info) + + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + def camelyon_data_download(self, filename): + if not os.path.exists(filename): + print(f"Test image [{filename}] does not exists downloading...") + path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" + ftp = ftplib.FTP("parrot.genomics.cn") + ftp.login("anonymous", "") + ftp.cwd(path) + ftp.retrbinary("RETR " + filename, open(filename, "wb").write) + ftp.quit() + + +if __name__ == "__main__": + unittest.main() From c40b019380118c0ffd8dd64bcaebfd07a950ff40 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:41:44 -0500 Subject: [PATCH 04/35] Sort imports Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 786ec6e71c..30932eccdb 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -23,13 +23,14 @@ from .utils import is_supported_format if TYPE_CHECKING: + import cuimage import itk # type: ignore import nibabel as nib + import openslide from itk import Image # type: ignore from nibabel.nifti1 import Nifti1Image from PIL import Image as PILImage - import cuimage - import openslide + has_itk = has_nib = has_pil = has_cux = True else: From f8b0962c718a14cf301aa5034198ceae01de0810 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 19:52:42 -0500 Subject: [PATCH 05/35] Add correct boundaries Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 44 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 30932eccdb..af61d399f6 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -684,12 +684,52 @@ def get_data( def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): size = [s * (2 ** level) for s in size] - region = img_obj.read_region(location=location, size=size, level=level) - region = np.asarray(region, dtype=dtype) + location, corrected_size, x_pad, y_pad = self.correct_boundries(img_obj, location, size) + region = img_obj.read_region(location=location, size=corrected_size, level=level) + + if (corrected_size[0] == size[0]) and (corrected_size[1] == size[1]): + region = np.asarray(region, dtype=dtype) + else: + # pad with white (255, 255, 255) + region = np.ones((size[0], size[1], 3), dtype=dtype) * 255 + region[y_pad[0] : y_pad[1], x_pad[0] : x_pad[1]] = np.asarray(region, dtype=dtype) + # CuImage: (H x W x C) -> torch image: (C X H X W) region = region.transpose((2, 0, 1)) return region + def correct_boundries(self, slide, location, size): + x_min, y_min = location + region_width, region_height = size + image_width, image_height = slide.resolutions["level_dimensions"][0] + + x_max = x_min + region_width + y_max = y_min + region_height + + x_pad_min = 0 + x_pad_max = region_width + y_pad_min = 0 + y_pad_max = region_height + if x_min < 0: + x_pad_min = -x_min + x_min = 0 + if y_min < 0: + y_pad_min = -y_min + y_min = 0 + if x_max > image_width: + x_pad_max = region_width - (x_max - image_width) + x_max = image_width + if y_max > image_height: + y_pad_max = region_height - (y_max - image_height) + y_max = image_height + region_size = (x_max - x_min), (y_max - y_min) + return ( + (x_min, y_min), + region_size, + (x_pad_min, x_pad_max), + (y_pad_min, y_pad_max), + ) + class OpenSlideReader(ImageReader): """ From 9a3e672dbe1c0fc47151267b4c38d1c8ad337ca6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 20:15:59 -0500 Subject: [PATCH 06/35] Add test cases for reading patches on a grid for CuImage Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 0dc50c74ba..d67d474a93 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -1,6 +1,7 @@ import ftplib import os import unittest +from unittest.case import skipIf import numpy as np from numpy.testing import assert_array_equal @@ -33,6 +34,16 @@ ), ] +TEST_CASE_4 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array( + [ + [[[198]], [[132]], [[194]]], + [[[228]], [[154]], [[217]]] + ] + ), +] class TestCuImageReader(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -45,14 +56,13 @@ def test_read_region(self, filename, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_3]) - def test_read_region(self, filename, patch_info, expected_img): + @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) + def test_read_patches(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) reader = CuImageReader() img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) - print("img.shape: ", img.shape) - print("img: ", img) + print(img) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) From b4633100ce6df1e8381245890b584b1ff54bdc58 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 20:17:05 -0500 Subject: [PATCH 07/35] Add patch whole slide imaging dataset for pathology Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 107 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index d8fd815ce9..2ce87394b1 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -17,12 +17,15 @@ from monai.apps.utils import download_and_extract from monai.data import ( + Dataset, CacheDataset, + SmartCacheDataset, load_decathlon_datalist, load_decathlon_properties, partition_dataset, select_cross_validation_folds, ) +from monai.data.image_reader import CuImageReader, OpenSlideReader from monai.transforms import LoadImaged, Randomizable from monai.utils import ensure_tuple @@ -388,3 +391,107 @@ def _split_datalist(self, datalist: List[Dict]) -> List[Dict]: return select_cross_validation_folds(partitions=data, folds=folds) return _NsplitsDataset(**self.dataset_params) + + +class PatchWSIDataset(Dataset): + """ + Load whole slide images and associated class labels and create patches + """ + + def __init__(self, data, region_size, grid_size, patch_size, transform=None, image_reader_name="CuImage"): + self.image_reader_name = image_reader_name.lower() + if type(region_size) == int: + self.region_size = (region_size, region_size) + else: + self.region_size = region_size + if type(grid_size) == int: + self.grid_size = (grid_size, grid_size) + else: + self.grid_size = grid_size + self.sub_region_size = (self.region_size[0] / self.grid_size[0], self.region_size[1] / self.grid_size[1]) + self.patch_size = patch_size + + self.transform = transform + self.image_base_path = data[0]["image_base_path"] + self.samples = self.load_samples(data[0]["labels"]) + self.image_path_list = set([x[0] for x in self.samples]) + self.num_samples = len(self.samples) + + self.cu_image_dict = {} + + if self.image_reader_name == "cuimage": + self.image_reader = CuImageReader() + elif self.image_reader_name == "openslide": + self.image_reader = OpenSlideReader() + else: + raise ValueError('image_reader_name should be either "CuImage" or "OpenSlide"') + self._fetch_cu_images() + + def _fetch_cu_images(self): + for image_path in self.image_path_list: + self.cu_image_dict[image_path] = self.image_reader.read(image_path) + + def process_label_row(self, row): + row = row.strip("\n").split(",") + # create full image path + image_name = row[0] + ".tif" + image_path = os.path.join(self.image_base_path, image_name) + # change center locations to upper left location + location = (int(row[1]) - self.region_size[0] // 2, int(row[2]) - self.region_size[1] // 2) + # convert labels to float32 and add empty HxW channel to label + labels = tuple(int(lbl) for lbl in row[3:]) + labels = np.array(labels, dtype=np.float32)[:, np.newaxis, np.newaxis] + return image_path, location, labels + + def load_samples(self, loc_path): + with open(loc_path) as label_file: + rows = [self.process_label_row(row) for row in label_file.readlines()] + return rows + + def __len__(self): + return self.num_samples + + def __getitem__(self, index): + image_path, location, labels = self.samples[index] + images = self.image_reader.get_data( + img_obj=self.cu_image_dict[image_path], + location=location, + size=self.region_size, + grid_shape=self.grid_size, + patch_size=self.patch_size, + ) + samples = [{"image": images[i], "label": labels[i]} for i in range(labels.shape[0])] + if self.transform: + samples = self.transform(samples) + return samples + + +class SmartCachePatchWSIDataset(SmartCacheDataset): + """ + Add SmartCache functionality to PatchWSIDataset + """ + + def __init__( + self, + data, + region_size, + grid_size, + patch_size, + transform, + replace_rate, + cache_num, + cache_rate=1.0, + num_init_workers=None, + num_replace_workers=0, + use_openslide=False, + ): + extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, use_openslide=use_openslide) + super().__init__( + data=extractor, + transform=transform, + replace_rate=replace_rate, + cache_num=cache_num, + cache_rate=cache_rate, + num_init_workers=num_init_workers, + num_replace_workers=num_replace_workers, + ) From 4c735cb534ea523f70ab02aefeb94ec26d5f52ff Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 20:55:39 -0500 Subject: [PATCH 08/35] Add test case for read patches for OpenSlide Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_openslide_reader.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index 258a1046f7..edfcf19204 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -5,7 +5,9 @@ import numpy as np from numpy.testing import assert_array_equal from parameterized import parameterized + from monai.data.image_reader import OpenSlideReader +from tests.utils import skip_if_quick filename = "test_001.tif" @@ -33,9 +35,20 @@ ), ] +TEST_CASE_4 = [ + filename, + {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array( + [ + [[[198]], [[132]], [[194]]], + [[[228]], [[154]], [[217]]] + ] + ), +] class TestOpenSlideReader(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) reader = OpenSlideReader() @@ -45,6 +58,16 @@ def test_read_region(self, filename, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) + @skip_if_quick + def test_read_patches(self, filename, patch_info, expected_img): + self.camelyon_data_download(filename) + reader = OpenSlideReader() + img_obj = reader.read(filename) + img = reader.get_data(img_obj, **patch_info) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + def camelyon_data_download(self, filename): if not os.path.exists(filename): print(f"Test image [{filename}] does not exists downloading...") From 378893c24857a4ae02bd7883f6f9215715d0ae4c Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 20:56:58 -0500 Subject: [PATCH 09/35] flake8 and few minor changes Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 10 +++++----- monai/data/image_reader.py | 28 +++++++--------------------- tests/test_cuimage_reader.py | 6 ++++-- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index 2ce87394b1..c2c6b6cbdb 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -17,8 +17,8 @@ from monai.apps.utils import download_and_extract from monai.data import ( - Dataset, CacheDataset, + Dataset, SmartCacheDataset, load_decathlon_datalist, load_decathlon_properties, @@ -398,7 +398,7 @@ class PatchWSIDataset(Dataset): Load whole slide images and associated class labels and create patches """ - def __init__(self, data, region_size, grid_size, patch_size, transform=None, image_reader_name="CuImage"): + def __init__(self, data, region_size, grid_size, patch_size, image_reader_name="CuImage", transform=None): self.image_reader_name = image_reader_name.lower() if type(region_size) == int: self.region_size = (region_size, region_size) @@ -414,7 +414,7 @@ def __init__(self, data, region_size, grid_size, patch_size, transform=None, ima self.transform = transform self.image_base_path = data[0]["image_base_path"] self.samples = self.load_samples(data[0]["labels"]) - self.image_path_list = set([x[0] for x in self.samples]) + self.image_path_list = set(x[0] for x in self.samples) self.num_samples = len(self.samples) self.cu_image_dict = {} @@ -483,9 +483,9 @@ def __init__( cache_rate=1.0, num_init_workers=None, num_replace_workers=0, - use_openslide=False, + image_reader_name="CuImage" ): - extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, use_openslide=use_openslide) + extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, image_reader_name) super().__init__( data=extractor, transform=transform, diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index af61d399f6..f3d8132cf3 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -31,7 +31,6 @@ from nibabel.nifti1 import Nifti1Image from PIL import Image as PILImage - has_itk = has_nib = has_pil = has_cux = True else: itk, has_itk = optional_import("itk", allow_namespace_pkg=True) @@ -42,7 +41,7 @@ cuimage, has_cux = optional_import("cuimage") openslide, has_osl = optional_import("openslide") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "CuImageReader", "OpenSlide"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "CuImageReader", "OpenSlideReader"] class ImageReader(ABC): @@ -646,14 +645,7 @@ def read(self, data: Union[Sequence[str], str, np.ndarray]): return img_ if len(filenames) > 1 else img_[0] def get_data( - self, - img_obj, - location=(0, 0), - size=None, - level=0, - dtype=np.uint8, - grid_shape=(1, 1), - patch_size=None + self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8, grid_shape=(1, 1), patch_size=None ): """ Extract regions as numpy array from WSI image and return them. @@ -686,14 +678,14 @@ def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np size = [s * (2 ** level) for s in size] location, corrected_size, x_pad, y_pad = self.correct_boundries(img_obj, location, size) region = img_obj.read_region(location=location, size=corrected_size, level=level) - if (corrected_size[0] == size[0]) and (corrected_size[1] == size[1]): region = np.asarray(region, dtype=dtype) else: + print(img_obj) # pad with white (255, 255, 255) region = np.ones((size[0], size[1], 3), dtype=dtype) * 255 region[y_pad[0] : y_pad[1], x_pad[0] : x_pad[1]] = np.asarray(region, dtype=dtype) - + # CuImage: (H x W x C) -> torch image: (C X H X W) region = region.transpose((2, 0, 1)) return region @@ -772,14 +764,7 @@ def read(self, data: Union[Sequence[str], str, np.ndarray]): return img_ if len(filenames) > 1 else img_[0] def get_data( - self, - img_obj, - location=(0, 0), - size=None, - level=0, - dtype=np.uint8, - grid_shape=(1, 1), - patch_size=None + self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8, grid_shape=(1, 1), patch_size=None ): """ Extract regions as numpy array from WSI image and return them. @@ -816,6 +801,7 @@ def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np region = region.transpose((2, 0, 1)) return region + def _extract_patches(region, grid_shape, patch_size, dtype=np.uint8): if grid_shape == (1, 1): return region @@ -837,4 +823,4 @@ def _extract_patches(region, grid_shape, patch_size, dtype=np.uint8): flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] idx += 1 - return flat_patch_grid \ No newline at end of file + return flat_patch_grid diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index d67d474a93..556aae5115 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -1,12 +1,13 @@ import ftplib import os import unittest -from unittest.case import skipIf import numpy as np from numpy.testing import assert_array_equal from parameterized import parameterized + from monai.data.image_reader import CuImageReader +from tests.utils import skip_if_quick filename = "test_001.tif" @@ -47,6 +48,7 @@ class TestCuImageReader(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) reader = CuImageReader() @@ -57,12 +59,12 @@ def test_read_region(self, filename, patch_info, expected_img): self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) + @skip_if_quick def test_read_patches(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) reader = CuImageReader() img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) - print(img) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) From ec5261bcba5a8fa3dbb419d171e1cce83e22d71d Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Feb 2021 21:06:36 -0500 Subject: [PATCH 10/35] black Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 2 +- tests/test_cuimage_reader.py | 8 ++------ tests/test_openslide_reader.py | 8 ++------ 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index c2c6b6cbdb..f8d18e5557 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -483,7 +483,7 @@ def __init__( cache_rate=1.0, num_init_workers=None, num_replace_workers=0, - image_reader_name="CuImage" + image_reader_name="CuImage", ): extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, image_reader_name) super().__init__( diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 556aae5115..096772c6f7 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -38,14 +38,10 @@ TEST_CASE_4 = [ filename, {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array( - [ - [[[198]], [[132]], [[194]]], - [[[228]], [[154]], [[217]]] - ] - ), + np.array([[[[198]], [[132]], [[194]]], [[[228]], [[154]], [[217]]]]), ] + class TestCuImageReader(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skip_if_quick diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index edfcf19204..a4fa507f5b 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -38,14 +38,10 @@ TEST_CASE_4 = [ filename, {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array( - [ - [[[198]], [[132]], [[194]]], - [[[228]], [[154]], [[217]]] - ] - ), + np.array([[[[198]], [[132]], [[194]]], [[[228]], [[154]], [[217]]]]), ] + class TestOpenSlideReader(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skip_if_quick From ce01a9b5965ab23be147fd1f8007b48ddf4b81b6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Fri, 5 Feb 2021 12:20:32 -0500 Subject: [PATCH 11/35] flake8 Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index f8d18e5557..454dac188f 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -414,7 +414,7 @@ def __init__(self, data, region_size, grid_size, patch_size, image_reader_name=" self.transform = transform self.image_base_path = data[0]["image_base_path"] self.samples = self.load_samples(data[0]["labels"]) - self.image_path_list = set(x[0] for x in self.samples) + self.image_path_list = {x[0] for x in self.samples} self.num_samples = len(self.samples) self.cu_image_dict = {} From 51c157886a579f798142fde73f78c89b1a687bab Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Sun, 7 Feb 2021 19:15:13 -0500 Subject: [PATCH 12/35] Add kwargs to CuImageReader and OpenSlideReader's read method Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index f3d8132cf3..10048c6d3a 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -626,7 +626,7 @@ def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: """ return has_cux and is_supported_format(filename, ["tif", "tiff"]) - def read(self, data: Union[Sequence[str], str, np.ndarray]): + def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): """ Read image data from specified file or files. Note that the returned object is CuImage or list of CuImage objects. @@ -745,7 +745,7 @@ def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: """ return has_cux and is_supported_format(filename, ["tif", "tiff"]) - def read(self, data: Union[Sequence[str], str, np.ndarray]): + def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): """ Read image data from specified file or files. Note that the returned object is OpenSlide or list of OpenSlide objects. From 714561a0db65f6b94c3e90012711fb7752345bb6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Sun, 7 Feb 2021 20:07:05 -0500 Subject: [PATCH 13/35] Change the type hint from np.dtype to DTypeLike Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 10048c6d3a..097ea65563 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import numpy as np +from numpy.typing import DTypeLike from torch.utils.data._utils.collate import np_str_obj_array_pattern from monai.config import KeysCollection @@ -299,7 +300,7 @@ class NibabelReader(ImageReader): """ - def __init__(self, as_closest_canonical: bool = False, dtype: Optional[np.dtype] = np.float32, **kwargs): + def __init__(self, as_closest_canonical: bool = False, dtype: Optional[DTypeLike] = np.float32, **kwargs): super().__init__() self.as_closest_canonical = as_closest_canonical self.dtype = dtype From e83573d1ab8a16246ac8f74f89d92d4cbe42b5c5 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 8 Feb 2021 16:51:56 -0500 Subject: [PATCH 14/35] Fix a bug Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 097ea65563..37b2fac9fe 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -663,7 +663,7 @@ def get_data( if size is None: if location == (0, 0): # the maximum size is set to WxH - size = (img_obj.shape[1], img_obj.shape[0]) + size = (img_obj.shape[1] // (2 ** level), img_obj.shape[0]) print("Size is set to maximum size: ", size) else: print("Size need to be provided!") @@ -678,14 +678,13 @@ def get_data( def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): size = [s * (2 ** level) for s in size] location, corrected_size, x_pad, y_pad = self.correct_boundries(img_obj, location, size) - region = img_obj.read_region(location=location, size=corrected_size, level=level) + region_raw = img_obj.read_region(location=location, size=corrected_size, level=level) if (corrected_size[0] == size[0]) and (corrected_size[1] == size[1]): - region = np.asarray(region, dtype=dtype) + region = np.asarray(region_raw, dtype=dtype) else: - print(img_obj) # pad with white (255, 255, 255) region = np.ones((size[0], size[1], 3), dtype=dtype) * 255 - region[y_pad[0] : y_pad[1], x_pad[0] : x_pad[1]] = np.asarray(region, dtype=dtype) + region[y_pad[0] : y_pad[1], x_pad[0] : x_pad[1]] = np.asarray(region_raw, dtype=dtype) # CuImage: (H x W x C) -> torch image: (C X H X W) region = region.transpose((2, 0, 1)) From 097eb19d4c678439575c3c94c6e3a4394c92b991 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 22 Feb 2021 18:36:58 -0500 Subject: [PATCH 15/35] Implement WSIReader and unittests Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 2 +- monai/data/image_reader.py | 239 +++++++++++---------------------- tests/test_cuimage_reader.py | 6 +- tests/test_openslide_reader.py | 6 +- 4 files changed, 82 insertions(+), 171 deletions(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index 454dac188f..db5e178877 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -25,7 +25,7 @@ partition_dataset, select_cross_validation_folds, ) -from monai.data.image_reader import CuImageReader, OpenSlideReader +from monai.data.image_reader import WSIReader from monai.transforms import LoadImaged, Randomizable from monai.utils import ensure_tuple diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 5892da9c0b..9ac1c7e367 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -12,10 +12,9 @@ import os import warnings from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, Type import numpy as np -from numpy.typing import DTypeLike from torch.utils.data._utils.collate import np_str_obj_array_pattern from monai.config import KeysCollection @@ -43,7 +42,7 @@ cuimage, has_cux = optional_import("cuimage") openslide, has_osl = optional_import("openslide") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "CuImageReader", "OpenSlideReader"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"] class ImageReader(ABC): @@ -311,7 +310,7 @@ class NibabelReader(ImageReader): """ - def __init__(self, as_closest_canonical: bool = False, dtype: Optional[DTypeLike] = np.float32, **kwargs): + def __init__(self, as_closest_canonical: bool = False, dtype: Optional[Type] = np.float32, **kwargs): super().__init__() self.as_closest_canonical = as_closest_canonical self.dtype = dtype @@ -616,17 +615,23 @@ def _get_spatial_shape(self, img) -> np.ndarray: return np.asarray((img.width, img.height)) -class CuImageReader(ImageReader): +class WSIReader(ImageReader): """ - Extraxt 2D patches from TIFF image file(s) + Read whole slide imaging and extract patches - Args: - converter: additional function to convert the image data after `read()`. """ - def __init__(self, converter: Optional[Callable] = None): + def __init__(self, wsi_reader_name: str = "CuImage"): super().__init__() - self.converter = converter + self.wsi_reader_name = wsi_reader_name.lower() + if self.wsi_reader_name == "cuclaraimage": + self.wsi_reader = cuimage.CuImage + print("> CuImage is being used.") + elif self.wsi_reader_name == "openslide": + self.wsi_reader = openslide.OpenSlide + print("> OpenSlide is being used.") + else: + raise ValueError('`wsi_reader_name` should be either "CuClaraImage" or "OpenSlide"') def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: """ @@ -636,7 +641,7 @@ def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: filename: file name or a list of file names to read. if a list of files, verify all the suffixes. """ - return has_cux and is_supported_format(filename, ["tif", "tiff"]) + return is_supported_format(filename, ["tif", "tiff"]) def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): """ @@ -647,191 +652,97 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): data: file name or a list of file names to read. """ - img_: List[cuimage.CuImage] = [] + img_: List = [] filenames: Sequence[str] = ensure_tuple(data) for name in filenames: - img = cuimage.CuImage(name) + img = self.wsi_reader(name) img_.append(img) return img_ if len(filenames) > 1 else img_[0] def get_data( - self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8, grid_shape=(1, 1), patch_size=None + self, + img_obj, + location: Tuple[int, int] = (0, 0), + size: Optional[Tuple[int, int]] = None, + level: int = 0, + dtype: Type = np.uint8, + grid_shape: Tuple[int, int] = (1, 1), + patch_size: Optional[int] = None, ): """ Extract regions as numpy array from WSI image and return them. Args: - img: a CuImage object loaded from a file, or list of CuImage objects + img: a wsi_reader object loaded from a file, or list of CuImage objects location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, or list of tuples (default=(0, 0)) size: (width, height) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) - This is the size of image at `level=0` + This is the size of image at the given level (`level`) level: the level number, or list of level numbers (default=0) """ if size is None: if location == (0, 0): # the maximum size is set to WxH - size = (img_obj.shape[1] // (2 ** level), img_obj.shape[0]) - print("Size is set to maximum size: ", size) + size = (img_obj.shape[1] // (2 ** level), img_obj.shape[0] // (2 ** level)) + print(f"Size is set to maximum size at level={level}: {size}") else: print("Size need to be provided!") return - region = self._extract_region(img_obj, location, size, level, dtype) - + region = self._extract_region(img_obj, location=location, size=size, level=level, dtype=dtype) if patch_size is None: - patch_size = region.shape[1:] - patches = _extract_patches(region, grid_shape, patch_size, dtype) - return patches - - def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): - size = [s * (2 ** level) for s in size] - location, corrected_size, x_pad, y_pad = self.correct_boundries(img_obj, location, size) - region_raw = img_obj.read_region(location=location, size=corrected_size, level=level) - if (corrected_size[0] == size[0]) and (corrected_size[1] == size[1]): - region = np.asarray(region_raw, dtype=dtype) + patches = region else: - # pad with white (255, 255, 255) - region = np.ones((size[0], size[1], 3), dtype=dtype) * 255 - region[y_pad[0] : y_pad[1], x_pad[0] : x_pad[1]] = np.asarray(region_raw, dtype=dtype) - - # CuImage: (H x W x C) -> torch image: (C X H X W) - region = region.transpose((2, 0, 1)) - return region - - def correct_boundries(self, slide, location, size): - x_min, y_min = location - region_width, region_height = size - image_width, image_height = slide.resolutions["level_dimensions"][0] - - x_max = x_min + region_width - y_max = y_min + region_height - - x_pad_min = 0 - x_pad_max = region_width - y_pad_min = 0 - y_pad_max = region_height - if x_min < 0: - x_pad_min = -x_min - x_min = 0 - if y_min < 0: - y_pad_min = -y_min - y_min = 0 - if x_max > image_width: - x_pad_max = region_width - (x_max - image_width) - x_max = image_width - if y_max > image_height: - y_pad_max = region_height - (y_max - image_height) - y_max = image_height - region_size = (x_max - x_min), (y_max - y_min) - return ( - (x_min, y_min), - region_size, - (x_pad_min, x_pad_max), - (y_pad_min, y_pad_max), - ) - - -class OpenSlideReader(ImageReader): - """ - Extraxt 2D patches from TIFF image file(s) - - Args: - converter: additional function to convert the image data after `read()`. - """ - - def __init__(self, converter: Optional[Callable] = None): - super().__init__() - self.converter = converter - - def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: - """ - Verify whether the specified file or files format is supported by WSI reader. - - Args: - filename: file name or a list of file names to read. - if a list of files, verify all the suffixes. - """ - return has_cux and is_supported_format(filename, ["tif", "tiff"]) - - def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): - """ - Read image data from specified file or files. - Note that the returned object is OpenSlide or list of OpenSlide objects. - - Args: - data: file name or a list of file names to read. - - """ - img_: List[openslide.OpenSlide] = [] - - filenames: Sequence[str] = ensure_tuple(data) - for name in filenames: - img = openslide.OpenSlide(name) - img_.append(img) - - return img_ if len(filenames) > 1 else img_[0] - - def get_data( - self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8, grid_shape=(1, 1), patch_size=None - ): - """ - Extract regions as numpy array from WSI image and return them. - - Args: - img: a OpenSlide object loaded from a file, or list of OpenSlide objects - location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, - or list of tuples (default=(0, 0)) - size: (width, height) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) - This is the size of the image at the given `level` - level: the level number, or list of level numbers (default=0) - - """ - if size is None: - if location == (0, 0): - # the maximum size is set to WxH - size = (img_obj.shape[1], img_obj.shape[0]) - print("Size is set to maximum size: ", size) - else: - print("Size need to be provided!") - return - region = self._extract_region(img_obj, location, size, level, dtype) - - if patch_size is None: - patch_size = region.shape[1:] - patches = _extract_patches(region, grid_shape, patch_size, dtype) + patches = self._extract_patches(region, patch_size=(patch_size, patch_size), grid_shape=grid_shape, dtype=dtype) return patches - def _extract_region(self, img_obj, location=(0, 0), size=None, level=0, dtype=np.uint8): - region = img_obj.read_region(location=location, level=level, size=size) - region = region.convert("RGB") + def _extract_region( + self, + img_obj, + location: Tuple[int, int] = (0, 0), + size: Optional[Tuple[int, int]] = None, + level: int = 0, + dtype: Type = np.uint8, + ): + region = img_obj.read_region(location=location, size=size, level=level) + if self.wsi_reader_name == "openslide": + region = region.convert("RGB") + # convert to numpy region = np.asarray(region, dtype=dtype) - # OpenSlide: (H x W x C) -> torch image: (C X H X W) + # cuCalaraImage/OpenSlide: (H x W x C) -> torch image: (C X H X W) region = region.transpose((2, 0, 1)) return region + def _extract_patches( + self, + region: np.ndarray, + grid_shape: Tuple[int, int] = (1, 1), + patch_size: Optional[Tuple[int, int]] = None, + dtype: Type = np.uint8, + ): + if patch_size is None and grid_shape == (1, 1): + return region -def _extract_patches(region, grid_shape, patch_size, dtype=np.uint8): - if grid_shape == (1, 1): - return region + n_patches = np.prod(grid_shape) + region_size = region.shape[1:] - n_patches = np.prod(grid_shape) - region_size = region.shape[1:] - - # split the region into patches on the grid and center crop them to patch size - flat_patch_grid = np.zeros((n_patches, 3, patch_size, patch_size), dtype=dtype) - start_points = [ - np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size / 2).astype(int) - for i in range(2) - ] - idx = 0 - for y_start in start_points[1]: - for x_start in start_points[0]: - x_end = x_start + patch_size - y_end = y_start + patch_size - flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] - idx += 1 - - return flat_patch_grid + if patch_size is None: + patch_size = (region_size[0] // grid_shape[0], region_size[1] // grid_shape[1]) + + # split the region into patches on the grid and center crop them to patch size + flat_patch_grid = np.zeros((n_patches, 3, patch_size[0], patch_size[1]), dtype=dtype) + start_points = [ + np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size[i] / 2).astype(int) + for i in range(2) + ] + idx = 0 + for y_start in start_points[1]: + for x_start in start_points[0]: + x_end = x_start + patch_size[0] + y_end = y_start + patch_size[1] + flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] + idx += 1 + + return flat_patch_grid diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 096772c6f7..23a48aa3aa 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -6,7 +6,7 @@ from numpy.testing import assert_array_equal from parameterized import parameterized -from monai.data.image_reader import CuImageReader +from monai.data.image_reader import WSIReader from tests.utils import skip_if_quick filename = "test_001.tif" @@ -47,7 +47,7 @@ class TestCuImageReader(unittest.TestCase): @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = CuImageReader() + reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -58,7 +58,7 @@ def test_read_region(self, filename, patch_info, expected_img): @skip_if_quick def test_read_patches(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = CuImageReader() + reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index a4fa507f5b..48e05eefcd 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -6,7 +6,7 @@ from numpy.testing import assert_array_equal from parameterized import parameterized -from monai.data.image_reader import OpenSlideReader +from monai.data.image_reader import WSIReader from tests.utils import skip_if_quick filename = "test_001.tif" @@ -47,7 +47,7 @@ class TestOpenSlideReader(unittest.TestCase): @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = OpenSlideReader() + reader = WSIReader("openslide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -58,7 +58,7 @@ def test_read_region(self, filename, patch_info, expected_img): @skip_if_quick def test_read_patches(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = OpenSlideReader() + reader = WSIReader("openslide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) From 356e0d4a12b82b38fddedb251391bbadf264d0b3 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 22 Feb 2021 18:44:59 -0500 Subject: [PATCH 16/35] Minor updates Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 9ac1c7e367..a4dd65f30e 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -12,12 +12,12 @@ import os import warnings from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, Type +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import numpy as np from torch.utils.data._utils.collate import np_str_obj_array_pattern -from monai.config import KeysCollection +from monai.config import DtypeLike, KeysCollection from monai.data.utils import correct_nifti_header_if_necessary from monai.utils import ensure_tuple, optional_import @@ -32,7 +32,7 @@ from nibabel.nifti1 import Nifti1Image from PIL import Image as PILImage - has_itk = has_nib = has_pil = has_cux = True + has_itk = has_nib = has_pil = has_cux = has_osl =True else: itk, has_itk = optional_import("itk", allow_namespace_pkg=True) Image, _ = optional_import("itk", allow_namespace_pkg=True, name="Image") @@ -310,7 +310,7 @@ class NibabelReader(ImageReader): """ - def __init__(self, as_closest_canonical: bool = False, dtype: Optional[Type] = np.float32, **kwargs): + def __init__(self, as_closest_canonical: bool = False, dtype: Optional[DtypeLike] = np.float32, **kwargs): super().__init__() self.as_closest_canonical = as_closest_canonical self.dtype = dtype @@ -667,7 +667,7 @@ def get_data( location: Tuple[int, int] = (0, 0), size: Optional[Tuple[int, int]] = None, level: int = 0, - dtype: Type = np.uint8, + dtype: DtypeLike = np.uint8, grid_shape: Tuple[int, int] = (1, 1), patch_size: Optional[int] = None, ): @@ -704,7 +704,7 @@ def _extract_region( location: Tuple[int, int] = (0, 0), size: Optional[Tuple[int, int]] = None, level: int = 0, - dtype: Type = np.uint8, + dtype: DtypeLike = np.uint8, ): region = img_obj.read_region(location=location, size=size, level=level) if self.wsi_reader_name == "openslide": @@ -720,7 +720,7 @@ def _extract_patches( region: np.ndarray, grid_shape: Tuple[int, int] = (1, 1), patch_size: Optional[Tuple[int, int]] = None, - dtype: Type = np.uint8, + dtype: DtypeLike = np.uint8, ): if patch_size is None and grid_shape == (1, 1): return region From 27a04f6a4fd554c1d3d41d77bb0ac91fe5206d24 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 22 Feb 2021 20:12:35 -0500 Subject: [PATCH 17/35] Fix few typing issues Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index a4dd65f30e..2cd62e03d8 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -256,7 +256,7 @@ def _get_affine(self, img) -> np.ndarray: origin = np.asarray(img.GetOrigin()) direction = np.asarray(direction) - affine = np.eye(direction.shape[0] + 1) + affine: np.ndarray = np.eye(direction.shape[0] + 1) affine[(slice(-1), slice(-1))] = direction @ np.diag(spacing) affine[(slice(-1), -1)] = origin return affine @@ -725,7 +725,7 @@ def _extract_patches( if patch_size is None and grid_shape == (1, 1): return region - n_patches = np.prod(grid_shape) + n_patches = grid_shape[0] * grid_shape[1] region_size = region.shape[1:] if patch_size is None: From 9f09e49dce8eedde3530ba235fcffe8ea7476d85 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 12:09:17 -0500 Subject: [PATCH 18/35] Revert datasets Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/apps/datasets.py | 107 ----------------------------------------- 1 file changed, 107 deletions(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index db5e178877..d8fd815ce9 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -18,14 +18,11 @@ from monai.apps.utils import download_and_extract from monai.data import ( CacheDataset, - Dataset, - SmartCacheDataset, load_decathlon_datalist, load_decathlon_properties, partition_dataset, select_cross_validation_folds, ) -from monai.data.image_reader import WSIReader from monai.transforms import LoadImaged, Randomizable from monai.utils import ensure_tuple @@ -391,107 +388,3 @@ def _split_datalist(self, datalist: List[Dict]) -> List[Dict]: return select_cross_validation_folds(partitions=data, folds=folds) return _NsplitsDataset(**self.dataset_params) - - -class PatchWSIDataset(Dataset): - """ - Load whole slide images and associated class labels and create patches - """ - - def __init__(self, data, region_size, grid_size, patch_size, image_reader_name="CuImage", transform=None): - self.image_reader_name = image_reader_name.lower() - if type(region_size) == int: - self.region_size = (region_size, region_size) - else: - self.region_size = region_size - if type(grid_size) == int: - self.grid_size = (grid_size, grid_size) - else: - self.grid_size = grid_size - self.sub_region_size = (self.region_size[0] / self.grid_size[0], self.region_size[1] / self.grid_size[1]) - self.patch_size = patch_size - - self.transform = transform - self.image_base_path = data[0]["image_base_path"] - self.samples = self.load_samples(data[0]["labels"]) - self.image_path_list = {x[0] for x in self.samples} - self.num_samples = len(self.samples) - - self.cu_image_dict = {} - - if self.image_reader_name == "cuimage": - self.image_reader = CuImageReader() - elif self.image_reader_name == "openslide": - self.image_reader = OpenSlideReader() - else: - raise ValueError('image_reader_name should be either "CuImage" or "OpenSlide"') - self._fetch_cu_images() - - def _fetch_cu_images(self): - for image_path in self.image_path_list: - self.cu_image_dict[image_path] = self.image_reader.read(image_path) - - def process_label_row(self, row): - row = row.strip("\n").split(",") - # create full image path - image_name = row[0] + ".tif" - image_path = os.path.join(self.image_base_path, image_name) - # change center locations to upper left location - location = (int(row[1]) - self.region_size[0] // 2, int(row[2]) - self.region_size[1] // 2) - # convert labels to float32 and add empty HxW channel to label - labels = tuple(int(lbl) for lbl in row[3:]) - labels = np.array(labels, dtype=np.float32)[:, np.newaxis, np.newaxis] - return image_path, location, labels - - def load_samples(self, loc_path): - with open(loc_path) as label_file: - rows = [self.process_label_row(row) for row in label_file.readlines()] - return rows - - def __len__(self): - return self.num_samples - - def __getitem__(self, index): - image_path, location, labels = self.samples[index] - images = self.image_reader.get_data( - img_obj=self.cu_image_dict[image_path], - location=location, - size=self.region_size, - grid_shape=self.grid_size, - patch_size=self.patch_size, - ) - samples = [{"image": images[i], "label": labels[i]} for i in range(labels.shape[0])] - if self.transform: - samples = self.transform(samples) - return samples - - -class SmartCachePatchWSIDataset(SmartCacheDataset): - """ - Add SmartCache functionality to PatchWSIDataset - """ - - def __init__( - self, - data, - region_size, - grid_size, - patch_size, - transform, - replace_rate, - cache_num, - cache_rate=1.0, - num_init_workers=None, - num_replace_workers=0, - image_reader_name="CuImage", - ): - extractor = PatchWSIDataset(data, region_size, grid_size, patch_size, image_reader_name) - super().__init__( - data=extractor, - transform=transform, - replace_rate=replace_rate, - cache_num=cache_num, - cache_rate=cache_rate, - num_init_workers=num_init_workers, - num_replace_workers=num_replace_workers, - ) From 4b9734f5172d2a422dd679f6b43bea0354a21826 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 13:49:09 -0500 Subject: [PATCH 19/35] Add shape property to openslide image object Reverse size to be compatible with output size (hxw) Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 2cd62e03d8..df6544c271 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -32,7 +32,7 @@ from nibabel.nifti1 import Nifti1Image from PIL import Image as PILImage - has_itk = has_nib = has_pil = has_cux = has_osl =True + has_itk = has_nib = has_pil = has_cux = has_osl = True else: itk, has_itk = optional_import("itk", allow_namespace_pkg=True) Image, _ = optional_import("itk", allow_namespace_pkg=True, name="Image") @@ -657,6 +657,8 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): filenames: Sequence[str] = ensure_tuple(data) for name in filenames: img = self.wsi_reader(name) + if self.wsi_reader_name == "openslide": + img.shape = (img.dimensions[1], img.dimensions[0], 3) img_.append(img) return img_ if len(filenames) > 1 else img_[0] @@ -678,7 +680,7 @@ def get_data( img: a wsi_reader object loaded from a file, or list of CuImage objects location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, or list of tuples (default=(0, 0)) - size: (width, height) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) + size: (height, width) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) This is the size of image at the given level (`level`) level: the level number, or list of level numbers (default=0) @@ -686,8 +688,8 @@ def get_data( if size is None: if location == (0, 0): # the maximum size is set to WxH - size = (img_obj.shape[1] // (2 ** level), img_obj.shape[0] // (2 ** level)) - print(f"Size is set to maximum size at level={level}: {size}") + size = (img_obj.shape[0] // (2 ** level), img_obj.shape[1] // (2 ** level)) + print(f"Reading the whole image at level={level} with shape={size}") else: print("Size need to be provided!") return @@ -695,7 +697,9 @@ def get_data( if patch_size is None: patches = region else: - patches = self._extract_patches(region, patch_size=(patch_size, patch_size), grid_shape=grid_shape, dtype=dtype) + patches = self._extract_patches( + region, patch_size=(patch_size, patch_size), grid_shape=grid_shape, dtype=dtype + ) return patches def _extract_region( @@ -706,6 +710,8 @@ def _extract_region( level: int = 0, dtype: DtypeLike = np.uint8, ): + # convert to read_region size, which is (width, height) + size = size[::-1] region = img_obj.read_region(location=location, size=size, level=level) if self.wsi_reader_name == "openslide": region = region.convert("RGB") From 563314feb8a85a9ecd938dac5c60b7b078765b03 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 13:52:58 -0500 Subject: [PATCH 20/35] Add untittest for loading the whole image Reverse the size accroding to the WSIReader Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 17 ++++++++++++++--- tests/test_openslide_reader.py | 21 ++++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 23a48aa3aa..7166bdbfc9 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -11,16 +11,17 @@ filename = "test_001.tif" +TEST_CASE_0 = [filename, (3, 53760, 77824)] TEST_CASE_1 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 4}, + {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 4}, np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), ] TEST_CASE_2 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 2}, + {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 2}, np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), ] @@ -43,6 +44,16 @@ class TestCuImageReader(unittest.TestCase): + @parameterized.expand([TEST_CASE_0]) + @skip_if_quick + def test_read_whole_image(self, filename, expected_shape): + self.camelyon_data_download(filename) + reader = WSIReader("CuClaraImage") + img_obj = reader.read(filename) + img = reader.get_data(img_obj) + + self.assertTupleEqual(img.shape, expected_shape) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): @@ -66,7 +77,7 @@ def test_read_patches(self, filename, patch_info, expected_img): def camelyon_data_download(self, filename): if not os.path.exists(filename): - print(f"Test image [{filename}] does not exists downloading...") + print(f"Test image [{filename}] does not exist. Downloading...") path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" ftp = ftplib.FTP("parrot.genomics.cn") ftp.login("anonymous", "") diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index 48e05eefcd..f0cfc1d811 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -11,16 +11,17 @@ filename = "test_001.tif" +TEST_CASE_0 = [filename, (3, 53760, 77824)] TEST_CASE_1 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 4}, + {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 4}, np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), ] TEST_CASE_2 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (1, 2), "level": 2}, + {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 2}, np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), ] @@ -43,11 +44,21 @@ class TestOpenSlideReader(unittest.TestCase): + @parameterized.expand([TEST_CASE_0]) + @skip_if_quick + def test_read_whole_image(self, filename, expected_shape): + self.camelyon_data_download(filename) + reader = WSIReader("OpenSlide") + img_obj = reader.read(filename) + img = reader.get_data(img_obj) + + self.assertTupleEqual(img.shape, expected_shape) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skip_if_quick def test_read_region(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = WSIReader("openslide") + reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -58,7 +69,7 @@ def test_read_region(self, filename, patch_info, expected_img): @skip_if_quick def test_read_patches(self, filename, patch_info, expected_img): self.camelyon_data_download(filename) - reader = WSIReader("openslide") + reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) @@ -66,7 +77,7 @@ def test_read_patches(self, filename, patch_info, expected_img): def camelyon_data_download(self, filename): if not os.path.exists(filename): - print(f"Test image [{filename}] does not exists downloading...") + print(f"Test image [{filename}] does not exist. Downloading...") path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" ftp = ftplib.FTP("parrot.genomics.cn") ftp.login("anonymous", "") From eb9655da4132ba696f40e35985890c1039eec7a6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 13:56:51 -0500 Subject: [PATCH 21/35] Update the whole image size Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 2 +- tests/test_openslide_reader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 7166bdbfc9..74679f53d3 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -11,7 +11,7 @@ filename = "test_001.tif" -TEST_CASE_0 = [filename, (3, 53760, 77824)] +TEST_CASE_0 = [filename, (3, 89600, 86016)] TEST_CASE_1 = [ filename, diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index f0cfc1d811..53f40644e7 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -11,7 +11,7 @@ filename = "test_001.tif" -TEST_CASE_0 = [filename, (3, 53760, 77824)] +TEST_CASE_0 = [filename, (3, 89600, 86016)] TEST_CASE_1 = [ filename, From 71f9af409be003fdd9c896e786f51d6b9a3a34df Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 14:13:55 -0500 Subject: [PATCH 22/35] Remove optional size Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index df6544c271..ee74b804d9 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -400,7 +400,7 @@ def _get_affine(self, img) -> np.ndarray: img: a Nibabel image object loaded from a image file. """ - return img.affine.copy() + return np.array(img.affine, copy=True) def _get_spatial_shape(self, img) -> np.ndarray: """ @@ -705,8 +705,8 @@ def get_data( def _extract_region( self, img_obj, + size: Tuple[int, int], location: Tuple[int, int] = (0, 0), - size: Optional[Tuple[int, int]] = None, level: int = 0, dtype: DtypeLike = np.uint8, ): From 3b9809607642688c4665ba6511a53f402265687b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 14:17:14 -0500 Subject: [PATCH 23/35] Remove optional dtype Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index ee74b804d9..a63f1fe5eb 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -310,7 +310,7 @@ class NibabelReader(ImageReader): """ - def __init__(self, as_closest_canonical: bool = False, dtype: Optional[DtypeLike] = np.float32, **kwargs): + def __init__(self, as_closest_canonical: bool = False, dtype: DtypeLike = np.float32, **kwargs): super().__init__() self.as_closest_canonical = as_closest_canonical self.dtype = dtype From 0076988250abd0dec62d15cccd24871d289d0ded Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Tue, 23 Feb 2021 14:21:30 -0500 Subject: [PATCH 24/35] Remove _get_spatial_shape return type Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index a63f1fe5eb..c0a06d6a1c 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -273,7 +273,7 @@ def _get_spatial_shape(self, img) -> np.ndarray: shape.reverse() return np.asarray(shape) - def _get_array_data(self, img) -> np.ndarray: + def _get_array_data(self, img): """ Get the raw array data of the image, converted to Numpy array. From 291846fd081a97c686642240d7d368ef9609c663 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Wed, 24 Feb 2021 10:23:07 -0500 Subject: [PATCH 25/35] Reverse the orders of dimensions of `location` to be compatible with image shape Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index c0a06d6a1c..9fb97572cf 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -624,12 +624,12 @@ class WSIReader(ImageReader): def __init__(self, wsi_reader_name: str = "CuImage"): super().__init__() self.wsi_reader_name = wsi_reader_name.lower() - if self.wsi_reader_name == "cuclaraimage": - self.wsi_reader = cuimage.CuImage - print("> CuImage is being used.") - elif self.wsi_reader_name == "openslide": + if self.wsi_reader_name == "openslide": self.wsi_reader = openslide.OpenSlide print("> OpenSlide is being used.") + elif self.wsi_reader_name == "cuclaraimage": + self.wsi_reader = cuimage.CuImage + print("> CuImage is being used.") else: raise ValueError('`wsi_reader_name` should be either "CuClaraImage" or "OpenSlide"') @@ -710,8 +710,9 @@ def _extract_region( level: int = 0, dtype: DtypeLike = np.uint8, ): - # convert to read_region size, which is (width, height) + # reverse the order of dimensions for size and location to be compatible with image shape size = size[::-1] + location = location[::-1] region = img_obj.read_region(location=location, size=size, level=level) if self.wsi_reader_name == "openslide": region = region.convert("RGB") From 3ac764791e867fce71036601357db2549e6f3a84 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Wed, 24 Feb 2021 10:27:38 -0500 Subject: [PATCH 26/35] Change test cases to use smaller image and revese location's dimensions Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 24 +++++++++++------------- tests/test_openslide_reader.py | 23 +++++++++++------------ 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 74679f53d3..1e19c5da21 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -9,37 +9,37 @@ from monai.data.image_reader import WSIReader from tests.utils import skip_if_quick -filename = "test_001.tif" +filename = "test_065.tif" -TEST_CASE_0 = [filename, (3, 89600, 86016)] +TEST_CASE_0 = [filename, (3, 53760, 77824)] TEST_CASE_1 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 4}, - np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), + {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 4}, + np.array([[[218], [237]], [[211], [230]], [[219], [237]]]), ] TEST_CASE_2 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 2}, - np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), + {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 2}, + np.array([[[229], [226]], [[218], [221]], [[232], [228]]]), ] TEST_CASE_3 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, np.array( [ - [[[218, 242], [189, 198]], [[154, 173], [125, 132]], [[214, 236], [185, 194]]], - [[[190, 209], [221, 228]], [[120, 137], [149, 154]], [[180, 200], [212, 217]]], + [[[227, 228], [227, 228]], [[226, 228], [226, 228]], [[231, 228], [231, 230]]], + [[[224, 224], [224, 226]], [[227, 228], [227, 227]], [[232, 231], [232, 231]]], ] ), ] TEST_CASE_4 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[198]], [[132]], [[194]]], [[[228]], [[154]], [[217]]]]), + {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array([[[[228]], [[228]], [[230]]], [[[226]], [[227]], [[231]]]]), ] @@ -51,7 +51,6 @@ def test_read_whole_image(self, filename, expected_shape): reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj) - self.assertTupleEqual(img.shape, expected_shape) @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -61,7 +60,6 @@ def test_read_region(self, filename, patch_info, expected_img): reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) - self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index 53f40644e7..10b5945272 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -9,37 +9,37 @@ from monai.data.image_reader import WSIReader from tests.utils import skip_if_quick -filename = "test_001.tif" +filename = "test_065.tif" -TEST_CASE_0 = [filename, (3, 89600, 86016)] +TEST_CASE_0 = [filename, (3, 53760, 77824)] TEST_CASE_1 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 4}, - np.array([[[234], [223]], [[174], [163]], [[228], [217]]]), + {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 4}, + np.array([[[218], [237]], [[211], [230]], [[219], [237]]]), ] TEST_CASE_2 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (2, 1), "level": 2}, - np.array([[[220], [197]], [[165], [143]], [[220], [195]]]), + {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 2}, + np.array([[[229], [226]], [[218], [221]], [[232], [228]]]), ] TEST_CASE_3 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, np.array( [ - [[[218, 242], [189, 198]], [[154, 173], [125, 132]], [[214, 236], [185, 194]]], - [[[190, 209], [221, 228]], [[120, 137], [149, 154]], [[180, 200], [212, 217]]], + [[[227, 228], [227, 228]], [[226, 228], [226, 228]], [[231, 228], [231, 230]]], + [[[224, 224], [224, 226]], [[227, 228], [227, 227]], [[232, 231], [232, 231]]], ] ), ] TEST_CASE_4 = [ filename, - {"location": (86016 // 2, 89600 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[198]], [[132]], [[194]]], [[[228]], [[154]], [[217]]]]), + {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array([[[[228]], [[228]], [[230]]], [[[226]], [[227]], [[231]]]]), ] @@ -61,7 +61,6 @@ def test_read_region(self, filename, patch_info, expected_img): reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) - self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) From b851859fe201ea8ce0b3c6f90e71e300ede2a8a8 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Fri, 26 Feb 2021 10:56:58 -0500 Subject: [PATCH 27/35] Replace the test TIFF and some upgrades Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 97 +++++++++++++++++++++------------ tests/test_openslide_reader.py | 98 ++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 67 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 1e19c5da21..c92c10be85 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -1,62 +1,91 @@ -import ftplib import os import unittest +from typing import TypedDict +from unittest import skipUnless +from urllib import request import numpy as np from numpy.testing import assert_array_equal from parameterized import parameterized from monai.data.image_reader import WSIReader -from tests.utils import skip_if_quick +from monai.utils import optional_import -filename = "test_065.tif" +_, has_cui = optional_import("cuimage") -TEST_CASE_0 = [filename, (3, 53760, 77824)] + +class SampleImage(TypedDict): + name: str + url: str + height: int + width: int + + +FILE_INFO: SampleImage = { + "name": "CMU-1.tiff", + "url": "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff", + "height": 32914, + "width": 46000, +} + +TEST_CASE_0 = [FILE_INFO, (3, FILE_INFO["height"], FILE_INFO["width"])] TEST_CASE_1 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 4}, - np.array([[[218], [237]], [[211], [230]], [[219], [237]]]), + FILE_INFO, + {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 4}, + np.array([[[246], [246]], [[246], [246]], [[244], [244]]]), ] TEST_CASE_2 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 2}, - np.array([[[229], [226]], [[218], [221]], [[232], [228]]]), + FILE_INFO, + {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 2}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), ] TEST_CASE_3 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + FILE_INFO, + { + "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "size": (8, 8), + "level": 2, + "grid_shape": (2, 1), + "patch_size": 2, + }, np.array( [ - [[[227, 228], [227, 228]], [[226, 228], [226, 228]], [[231, 228], [231, 230]]], - [[[224, 224], [224, 226]], [[227, 228], [227, 227]], [[232, 231], [232, 231]]], + [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], + [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], ] ), ] TEST_CASE_4 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[228]], [[228]], [[230]]], [[[226]], [[227]], [[231]]]]), + FILE_INFO, + { + "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "size": (8, 8), + "level": 2, + "grid_shape": (2, 1), + "patch_size": 1, + }, + np.array([[[[246]], [[246]], [[246]]], [[[246]], [[246]], [[246]]]]), ] class TestCuImageReader(unittest.TestCase): @parameterized.expand([TEST_CASE_0]) - @skip_if_quick - def test_read_whole_image(self, filename, expected_shape): - self.camelyon_data_download(filename) + @skipUnless(has_cui, "Requires CuClaraImage") + def test_read_whole_image(self, file_info, expected_shape): + filename = self.camelyon_data_download(file_info) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj) self.assertTupleEqual(img.shape, expected_shape) @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) - @skip_if_quick - def test_read_region(self, filename, patch_info, expected_img): - self.camelyon_data_download(filename) + @skipUnless(has_cui, "Requires CuClaraImage") + def test_read_region(self, file_info, patch_info, expected_img): + filename = self.camelyon_data_download(file_info) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -64,24 +93,26 @@ def test_read_region(self, filename, patch_info, expected_img): self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) - @skip_if_quick - def test_read_patches(self, filename, patch_info, expected_img): - self.camelyon_data_download(filename) + @skipUnless(has_cui, "Requires CuClaraImage") + def test_read_patches(self, file_info, patch_info, expected_img): + filename = self.camelyon_data_download(file_info) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - def camelyon_data_download(self, filename): + def camelyon_data_download(self, file_info): + from time import perf_counter + + filename = file_info["name"] if not os.path.exists(filename): print(f"Test image [{filename}] does not exist. Downloading...") - path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" - ftp = ftplib.FTP("parrot.genomics.cn") - ftp.login("anonymous", "") - ftp.cwd(path) - ftp.retrbinary("RETR " + filename, open(filename, "wb").write) - ftp.quit() + t0 = perf_counter() + request.urlretrieve(file_info["url"], filename) + t1 = perf_counter() + print(f"Elapsed time: {t1 - t0}s") + return filename if __name__ == "__main__": diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index 10b5945272..46a2cdf506 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -1,63 +1,91 @@ -import ftplib import os import unittest +from typing import TypedDict +from unittest import skipUnless +from urllib import request import numpy as np from numpy.testing import assert_array_equal from parameterized import parameterized from monai.data.image_reader import WSIReader -from tests.utils import skip_if_quick +from monai.utils import optional_import -filename = "test_065.tif" +_, has_osl = optional_import("openslide") -TEST_CASE_0 = [filename, (3, 53760, 77824)] + +class SampleImage(TypedDict): + name: str + url: str + height: int + width: int + + +FILE_INFO: SampleImage = { + "name": "CMU-1.tiff", + "url": "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff", + "height": 32914, + "width": 46000, +} + +TEST_CASE_0 = [FILE_INFO, (3, FILE_INFO["height"], FILE_INFO["width"])] TEST_CASE_1 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 4}, - np.array([[[218], [237]], [[211], [230]], [[219], [237]]]), + FILE_INFO, + {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 4}, + np.array([[[246], [246]], [[246], [246]], [[244], [244]]]), ] TEST_CASE_2 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (2, 1), "level": 2}, - np.array([[[229], [226]], [[218], [221]], [[232], [228]]]), + FILE_INFO, + {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 2}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), ] TEST_CASE_3 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + FILE_INFO, + { + "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "size": (8, 8), + "level": 2, + "grid_shape": (2, 1), + "patch_size": 2, + }, np.array( [ - [[[227, 228], [227, 228]], [[226, 228], [226, 228]], [[231, 228], [231, 230]]], - [[[224, 224], [224, 226]], [[227, 228], [227, 227]], [[232, 231], [232, 231]]], + [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], + [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], ] ), ] TEST_CASE_4 = [ - filename, - {"location": (53760 // 2, 77824 // 2), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[228]], [[228]], [[230]]], [[[226]], [[227]], [[231]]]]), + FILE_INFO, + { + "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "size": (8, 8), + "level": 2, + "grid_shape": (2, 1), + "patch_size": 1, + }, + np.array([[[[246]], [[246]], [[246]]], [[[246]], [[246]], [[246]]]]), ] class TestOpenSlideReader(unittest.TestCase): @parameterized.expand([TEST_CASE_0]) - @skip_if_quick - def test_read_whole_image(self, filename, expected_shape): - self.camelyon_data_download(filename) + @skipUnless(has_osl, "Requires OpenSlide") + def test_read_whole_image(self, file_info, expected_shape): + filename = self.camelyon_data_download(file_info) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj) - self.assertTupleEqual(img.shape, expected_shape) @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) - @skip_if_quick - def test_read_region(self, filename, patch_info, expected_img): - self.camelyon_data_download(filename) + @skipUnless(has_osl, "Requires OpenSlide") + def test_read_region(self, file_info, patch_info, expected_img): + filename = self.camelyon_data_download(file_info) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -65,24 +93,26 @@ def test_read_region(self, filename, patch_info, expected_img): self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) - @skip_if_quick - def test_read_patches(self, filename, patch_info, expected_img): - self.camelyon_data_download(filename) + @skipUnless(has_osl, "Requires OpenSlide") + def test_read_patches(self, file_info, patch_info, expected_img): + filename = self.camelyon_data_download(file_info) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - def camelyon_data_download(self, filename): + def camelyon_data_download(self, file_info): + from time import perf_counter + + filename = file_info["name"] if not os.path.exists(filename): print(f"Test image [{filename}] does not exist. Downloading...") - path = "gigadb/pub/10.5524/100001_101000/100439/CAMELYON16/testing/images/" - ftp = ftplib.FTP("parrot.genomics.cn") - ftp.login("anonymous", "") - ftp.cwd(path) - ftp.retrbinary("RETR " + filename, open(filename, "wb").write) - ftp.quit() + t0 = perf_counter() + request.urlretrieve(file_info["url"], filename) + t1 = perf_counter() + print(f"Elapsed time: {t1 - t0}s") + return filename if __name__ == "__main__": From 0a996581865db3c4f41b1b0da1937e51c6c41f35 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Fri, 26 Feb 2021 10:58:09 -0500 Subject: [PATCH 28/35] Update dependencies for OpenSlide Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- docs/requirements.txt | 1 + requirements-dev.txt | 1 + setup.cfg | 3 +++ 3 files changed, 5 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index d046bc53cf..6dd606ad91 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,6 +4,7 @@ pytorch-ignite==0.4.2 numpy>=1.17 itk>=5.0 nibabel +openslide parameterized scikit-image>=0.14.2 tensorboard diff --git a/requirements-dev.txt b/requirements-dev.txt index 2a43e63d73..3eeab474b6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,3 +30,4 @@ Sphinx==3.3.0 recommonmark==0.6.0 sphinx-autodoc-typehints==1.11.1 sphinx-rtd-theme==0.5.0 +openslide-python==1.1.2 diff --git a/setup.cfg b/setup.cfg index ea61eadd92..4b6b3f84f2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,6 +32,7 @@ all = torchvision itk>=5.0 tqdm>=4.47.0 + openslide==1.1.2 nibabel = nibabel skimage = @@ -54,6 +55,8 @@ lmdb = lmdb psutil = psutil +openslide = + openslide==1.1.2 [flake8] select = B,C,E,F,N,P,T4,W,B9 From 563a4fac3bb1983527810e60258603f9ace4cef5 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 1 Mar 2021 17:16:45 -0500 Subject: [PATCH 29/35] Update unittests for OpenSlide and CuImage Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- tests/test_cuimage_reader.py | 70 +++++++++++++--------------------- tests/test_openslide_reader.py | 68 +++++++++++++-------------------- 2 files changed, 53 insertions(+), 85 deletions(-) diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index c92c10be85..383c5bf17b 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -1,6 +1,5 @@ import os import unittest -from typing import TypedDict from unittest import skipUnless from urllib import request @@ -14,38 +13,28 @@ _, has_cui = optional_import("cuimage") -class SampleImage(TypedDict): - name: str - url: str - height: int - width: int +FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff" +HEIGHT = 32914 +WIDTH = 46000 - -FILE_INFO: SampleImage = { - "name": "CMU-1.tiff", - "url": "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff", - "height": 32914, - "width": 46000, -} - -TEST_CASE_0 = [FILE_INFO, (3, FILE_INFO["height"], FILE_INFO["width"])] +TEST_CASE_0 = [FILE_URL, (3, HEIGHT, WIDTH)] TEST_CASE_1 = [ - FILE_INFO, - {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 4}, - np.array([[[246], [246]], [[246], [246]], [[244], [244]]]), + FILE_URL, + {"location": (HEIGHT // 2, WIDTH // 2), "size": (2, 1), "level": 0}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), ] TEST_CASE_2 = [ - FILE_INFO, - {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 2}, - np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), + FILE_URL, + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), ] TEST_CASE_3 = [ - FILE_INFO, + FILE_URL, { - "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), @@ -53,30 +42,30 @@ class SampleImage(TypedDict): }, np.array( [ - [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], - [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], + [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], + [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], ] ), ] TEST_CASE_4 = [ - FILE_INFO, + FILE_URL, { - "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1, }, - np.array([[[[246]], [[246]], [[246]]], [[[246]], [[246]], [[246]]]]), + np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), ] -class TestCuImageReader(unittest.TestCase): +class TestCuClaraImageReader(unittest.TestCase): @parameterized.expand([TEST_CASE_0]) @skipUnless(has_cui, "Requires CuClaraImage") - def test_read_whole_image(self, file_info, expected_shape): - filename = self.camelyon_data_download(file_info) + def test_read_whole_image(self, file_url, expected_shape): + filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj) @@ -84,8 +73,8 @@ def test_read_whole_image(self, file_info, expected_shape): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skipUnless(has_cui, "Requires CuClaraImage") - def test_read_region(self, file_info, patch_info, expected_img): - filename = self.camelyon_data_download(file_info) + def test_read_region(self, file_url, patch_info, expected_img): + filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -94,24 +83,19 @@ def test_read_region(self, file_info, patch_info, expected_img): @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) @skipUnless(has_cui, "Requires CuClaraImage") - def test_read_patches(self, file_info, patch_info, expected_img): - filename = self.camelyon_data_download(file_info) + def test_read_patches(self, file_url, patch_info, expected_img): + filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - def camelyon_data_download(self, file_info): - from time import perf_counter - - filename = file_info["name"] + def camelyon_data_download(self, file_url): + filename = os.path.basename(file_url) if not os.path.exists(filename): print(f"Test image [{filename}] does not exist. Downloading...") - t0 = perf_counter() - request.urlretrieve(file_info["url"], filename) - t1 = perf_counter() - print(f"Elapsed time: {t1 - t0}s") + request.urlretrieve(file_url, filename) return filename diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index 46a2cdf506..ede3fb4eb2 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -1,6 +1,5 @@ import os import unittest -from typing import TypedDict from unittest import skipUnless from urllib import request @@ -14,38 +13,28 @@ _, has_osl = optional_import("openslide") -class SampleImage(TypedDict): - name: str - url: str - height: int - width: int +FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff" +HEIGHT = 32914 +WIDTH = 46000 - -FILE_INFO: SampleImage = { - "name": "CMU-1.tiff", - "url": "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff", - "height": 32914, - "width": 46000, -} - -TEST_CASE_0 = [FILE_INFO, (3, FILE_INFO["height"], FILE_INFO["width"])] +TEST_CASE_0 = [FILE_URL, (3, HEIGHT, WIDTH)] TEST_CASE_1 = [ - FILE_INFO, - {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 4}, - np.array([[[246], [246]], [[246], [246]], [[244], [244]]]), + FILE_URL, + {"location": (HEIGHT // 2, WIDTH // 2), "size": (2, 1), "level": 0}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), ] TEST_CASE_2 = [ - FILE_INFO, - {"location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), "size": (2, 1), "level": 2}, - np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), + FILE_URL, + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), ] TEST_CASE_3 = [ - FILE_INFO, + FILE_URL, { - "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), @@ -53,30 +42,30 @@ class SampleImage(TypedDict): }, np.array( [ - [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], - [[[246, 246], [246, 246]], [[246, 246], [246, 246]], [[246, 246], [246, 246]]], + [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], + [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], ] ), ] TEST_CASE_4 = [ - FILE_INFO, + FILE_URL, { - "location": (FILE_INFO["height"] // 2, FILE_INFO["width"] // 2), + "location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1, }, - np.array([[[[246]], [[246]], [[246]]], [[[246]], [[246]], [[246]]]]), + np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), ] class TestOpenSlideReader(unittest.TestCase): @parameterized.expand([TEST_CASE_0]) @skipUnless(has_osl, "Requires OpenSlide") - def test_read_whole_image(self, file_info, expected_shape): - filename = self.camelyon_data_download(file_info) + def test_read_whole_image(self, file_url, expected_shape): + filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj) @@ -84,8 +73,8 @@ def test_read_whole_image(self, file_info, expected_shape): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @skipUnless(has_osl, "Requires OpenSlide") - def test_read_region(self, file_info, patch_info, expected_img): - filename = self.camelyon_data_download(file_info) + def test_read_region(self, file_url, patch_info, expected_img): + filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) @@ -94,24 +83,19 @@ def test_read_region(self, file_info, patch_info, expected_img): @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) @skipUnless(has_osl, "Requires OpenSlide") - def test_read_patches(self, file_info, patch_info, expected_img): - filename = self.camelyon_data_download(file_info) + def test_read_patches(self, file_url, patch_info, expected_img): + filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) img = reader.get_data(img_obj, **patch_info) self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - def camelyon_data_download(self, file_info): - from time import perf_counter - - filename = file_info["name"] + def camelyon_data_download(self, file_url): + filename = os.path.basename(file_url) if not os.path.exists(filename): print(f"Test image [{filename}] does not exist. Downloading...") - t0 = perf_counter() - request.urlretrieve(file_info["url"], filename) - t1 = perf_counter() - print(f"Elapsed time: {t1 - t0}s") + request.urlretrieve(file_url, filename) return filename From 3ac12c32087fa4608fbe6703fb0cc82ca2c5bd78 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 1 Mar 2021 17:35:46 -0500 Subject: [PATCH 30/35] Fix openslide dependency Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4b6b3f84f2..f18b4610fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,7 +32,7 @@ all = torchvision itk>=5.0 tqdm>=4.47.0 - openslide==1.1.2 + openslide-python==1.1.2 nibabel = nibabel skimage = @@ -56,7 +56,7 @@ lmdb = psutil = psutil openslide = - openslide==1.1.2 + openslide-python==1.1.2 [flake8] select = B,C,E,F,N,P,T4,W,B9 From 15c147dfc6d69f55af728190a2f47af6d837c849 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Mon, 1 Mar 2021 18:39:04 -0500 Subject: [PATCH 31/35] Fix doc dependencies Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 6dd606ad91..cd06166359 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,7 +4,7 @@ pytorch-ignite==0.4.2 numpy>=1.17 itk>=5.0 nibabel -openslide +openslide-python==1.1.2 parameterized scikit-image>=0.14.2 tensorboard From 8a279c3430241b1186d2615a19724ef5d03c6953 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Wed, 3 Mar 2021 10:42:46 -0500 Subject: [PATCH 32/35] Minor changes Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- docs/source/data.rst | 4 ++++ monai/data/__init__.py | 2 +- monai/data/image_reader.py | 21 +++++++++++---------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/source/data.rst b/docs/source/data.rst index 11609964c3..eed4b30ded 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -105,6 +105,10 @@ PILReader .. autoclass:: PILReader :members: +WSIReader +~~~~~~~~~ +.. autoclass:: WSIReader + :members: Nifti format handling --------------------- diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 3dd0a980ef..54ee7908f4 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -24,7 +24,7 @@ from .decathlon_datalist import load_decathlon_datalist, load_decathlon_properties from .grid_dataset import GridPatchDataset, PatchDataset from .image_dataset import ImageDataset -from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader +from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader, WSIReader from .iterable_dataset import IterableDataset from .nifti_saver import NiftiSaver from .nifti_writer import write_nifti diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 9fb97572cf..2b5f5efbf7 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -621,7 +621,7 @@ class WSIReader(ImageReader): """ - def __init__(self, wsi_reader_name: str = "CuImage"): + def __init__(self, wsi_reader_name: str = "cuClaraImage"): super().__init__() self.wsi_reader_name = wsi_reader_name.lower() if self.wsi_reader_name == "openslide": @@ -631,7 +631,7 @@ def __init__(self, wsi_reader_name: str = "CuImage"): self.wsi_reader = cuimage.CuImage print("> CuImage is being used.") else: - raise ValueError('`wsi_reader_name` should be either "CuClaraImage" or "OpenSlide"') + raise ValueError('`wsi_reader_name` should be either "cuClaraImage" or "OpenSlide"') def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: """ @@ -677,13 +677,15 @@ def get_data( Extract regions as numpy array from WSI image and return them. Args: - img: a wsi_reader object loaded from a file, or list of CuImage objects + img_obj: a WSIReader object loaded from a file, or list of CuImage objects location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, - or list of tuples (default=(0, 0)) - size: (height, width) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) - This is the size of image at the given level (`level`) - level: the level number, or list of level numbers (default=0) - + or list of tuples (default=(0, 0)) + size: (height, width) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) + This is the size of image at the given level (`level`) + level: the level number, or list of level numbers (default=0) + dtype: the data type of output image + grid_shape: (row, columns) tuple define a grid to extract patches on that + patch_size: (heigsht, width) the size of extracted patches at the given level """ if size is None: if location == (0, 0): @@ -691,8 +693,7 @@ def get_data( size = (img_obj.shape[0] // (2 ** level), img_obj.shape[1] // (2 ** level)) print(f"Reading the whole image at level={level} with shape={size}") else: - print("Size need to be provided!") - return + raise ValueError("Size need to be provided to extract the region!") region = self._extract_region(img_obj, location=location, size=size, level=level, dtype=dtype) if patch_size is None: patches = region From c8750f0cf43325398c78a78ac9e64f60415d0fbb Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Mar 2021 10:45:08 -0500 Subject: [PATCH 33/35] Few variable name changes Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index c4b53acff8..78a08ca242 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -20,6 +20,7 @@ from monai.config import DtypeLike, KeysCollection from monai.data.utils import correct_nifti_header_if_necessary from monai.utils import ensure_tuple, optional_import +from monai.transforms import EnsureChannelFirst from .utils import is_supported_format @@ -638,17 +639,17 @@ class WSIReader(ImageReader): """ - def __init__(self, wsi_reader_name: str = "cuClaraImage"): + def __init__(self, reader_lib: str = "cuClaraImage"): super().__init__() - self.wsi_reader_name = wsi_reader_name.lower() - if self.wsi_reader_name == "openslide": + self.reader_lib = reader_lib.lower() + if self.reader_lib == "openslide": self.wsi_reader = openslide.OpenSlide print("> OpenSlide is being used.") - elif self.wsi_reader_name == "cuclaraimage": + elif self.reader_lib == "cuclaraimage": self.wsi_reader = cuimage.CuImage print("> CuImage is being used.") else: - raise ValueError('`wsi_reader_name` should be either "cuClaraImage" or "OpenSlide"') + raise ValueError('`reader_lib` should be either "cuClaraImage" or "OpenSlide"') def verify_suffix(self, filename: Union[Sequence[str], str]) -> bool: """ @@ -674,7 +675,7 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): filenames: Sequence[str] = ensure_tuple(data) for name in filenames: img = self.wsi_reader(name) - if self.wsi_reader_name == "openslide": + if self.reader_lib == "openslide": img.shape = (img.dimensions[1], img.dimensions[0], 3) img_.append(img) @@ -682,7 +683,7 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs): def get_data( self, - img_obj, + img, location: Tuple[int, int] = (0, 0), size: Optional[Tuple[int, int]] = None, level: int = 0, @@ -694,10 +695,10 @@ def get_data( Extract regions as numpy array from WSI image and return them. Args: - img_obj: a WSIReader object loaded from a file, or list of CuImage objects + img: a WSIReader image object loaded from a file, or list of CuImage objects location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, or list of tuples (default=(0, 0)) - size: (height, width) tuple giving the region size, or list of tuples (default=(wsi_width, wsi_height)) + size: (height, width) tuple giving the region size, or list of tuples (default to full image size) This is the size of image at the given level (`level`) level: the level number, or list of level numbers (default=0) dtype: the data type of output image @@ -707,11 +708,11 @@ def get_data( if size is None: if location == (0, 0): # the maximum size is set to WxH - size = (img_obj.shape[0] // (2 ** level), img_obj.shape[1] // (2 ** level)) + size = (img.shape[0] // (2 ** level), img.shape[1] // (2 ** level)) print(f"Reading the whole image at level={level} with shape={size}") else: raise ValueError("Size need to be provided to extract the region!") - region = self._extract_region(img_obj, location=location, size=size, level=level, dtype=dtype) + region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) if patch_size is None: patches = region else: @@ -732,7 +733,7 @@ def _extract_region( size = size[::-1] location = location[::-1] region = img_obj.read_region(location=location, size=size, level=level) - if self.wsi_reader_name == "openslide": + if self.reader_lib == "openslide": region = region.convert("RGB") # convert to numpy region = np.asarray(region, dtype=dtype) From a440caffa7170c88064025fc3c56f20de6df7ec6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Mar 2021 12:54:53 -0500 Subject: [PATCH 34/35] Add EnsureChannelFirst Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 78a08ca242..48af76bf4d 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -19,8 +19,8 @@ from monai.config import DtypeLike, KeysCollection from monai.data.utils import correct_nifti_header_if_necessary +from monai.transforms.utility.array import EnsureChannelFirst from monai.utils import ensure_tuple, optional_import -from monai.transforms import EnsureChannelFirst from .utils import is_supported_format @@ -712,13 +712,21 @@ def get_data( print(f"Reading the whole image at level={level} with shape={size}") else: raise ValueError("Size need to be provided to extract the region!") + region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) + + metadata: Dict = {} + metadata["spatial_shape"] = size + metadata["original_channel_dim"] = -1 + region = EnsureChannelFirst()(region, metadata) + if patch_size is None: patches = region else: patches = self._extract_patches( region, patch_size=(patch_size, patch_size), grid_shape=grid_shape, dtype=dtype ) + return patches def _extract_region( @@ -737,8 +745,7 @@ def _extract_region( region = region.convert("RGB") # convert to numpy region = np.asarray(region, dtype=dtype) - # cuCalaraImage/OpenSlide: (H x W x C) -> torch image: (C X H X W) - region = region.transpose((2, 0, 1)) + return region def _extract_patches( From 652f046ea3327d2291f542391f4145cf8e1462af Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+behxyz@users.noreply.github.com> Date: Thu, 4 Mar 2021 12:58:59 -0500 Subject: [PATCH 35/35] Add metadata to WSIReader Signed-off-by: Behrooz <3968947+behxyz@users.noreply.github.com> --- monai/data/image_reader.py | 2 +- tests/test_cuimage_reader.py | 6 +++--- tests/test_openslide_reader.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 48af76bf4d..76bf1817dc 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -727,7 +727,7 @@ def get_data( region, patch_size=(patch_size, patch_size), grid_shape=grid_shape, dtype=dtype ) - return patches + return patches, metadata def _extract_region( self, diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py index 383c5bf17b..7cdf692a30 100644 --- a/tests/test_cuimage_reader.py +++ b/tests/test_cuimage_reader.py @@ -68,7 +68,7 @@ def test_read_whole_image(self, file_url, expected_shape): filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) - img = reader.get_data(img_obj) + img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -77,7 +77,7 @@ def test_read_region(self, file_url, patch_info, expected_img): filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) - img = reader.get_data(img_obj, **patch_info) + img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) @@ -87,7 +87,7 @@ def test_read_patches(self, file_url, patch_info, expected_img): filename = self.camelyon_data_download(file_url) reader = WSIReader("CuClaraImage") img_obj = reader.read(filename) - img = reader.get_data(img_obj, **patch_info) + img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py index ede3fb4eb2..e1f9187937 100644 --- a/tests/test_openslide_reader.py +++ b/tests/test_openslide_reader.py @@ -68,7 +68,7 @@ def test_read_whole_image(self, file_url, expected_shape): filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) - img = reader.get_data(img_obj) + img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -77,7 +77,7 @@ def test_read_region(self, file_url, patch_info, expected_img): filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) - img = reader.get_data(img_obj, **patch_info) + img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) @@ -87,7 +87,7 @@ def test_read_patches(self, file_url, patch_info, expected_img): filename = self.camelyon_data_download(file_url) reader = WSIReader("OpenSlide") img_obj = reader.read(filename) - img = reader.get_data(img_obj, **patch_info) + img = reader.get_data(img_obj, **patch_info)[0] self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img))