From ccd22dd45b7d06afab6435cd74607a7964fb8808 Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Fri, 12 Feb 2021 13:33:28 -0800 Subject: [PATCH 1/3] Add deepgrow dataset Signed-off-by: YuanTingHsieh --- monai/apps/deepgrow/dataset.py | 271 +++++++++++++++++++++++++++++++++ tests/test_deepgrow_dataset.py | 55 +++++++ 2 files changed, 326 insertions(+) create mode 100644 monai/apps/deepgrow/dataset.py create mode 100644 tests/test_deepgrow_dataset.py diff --git a/monai/apps/deepgrow/dataset.py b/monai/apps/deepgrow/dataset.py new file mode 100644 index 0000000000..2f684a9707 --- /dev/null +++ b/monai/apps/deepgrow/dataset.py @@ -0,0 +1,271 @@ +# Copyright 2020 - 2021 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from typing import Dict, List + +import numpy as np + +from monai.transforms import AsChannelFirstd, Compose, LoadImaged, Orientationd, Spacingd +from monai.utils import GridSampleMode + + +def create_dataset( + datalist, + output_dir: str, + dimension, + pixdim, + image_key: str = "image", + label_key: str = "label", + base_dir: str = None, + limit: int = 0, + relative_path: bool = False, + transforms=None, +) -> List[Dict]: + """ + Utility to pre-process and create dataset list for Deepgrow training over on existing one. + The input data list is normally a list of images and labels (3D volume) that needs pre-processing + for Deepgrow training pipeline. + + Args: + datalist: A generic dataset with a length property which normally contains a list of data dictionary. + For example, typical input data can be a list of dictionaries:: + + [{'image': 'img1.nii', 'label': 'label1.nii'}] + + output_dir: target directory to store the training data for Deepgrow Training + pixdim: output voxel spacing. + dimension: dimension for Deepgrow training. It can be 2 or 3. + image_key: image key in input datalist. Defaults to 'image'. + label_key: label key in input datalist. Defaults to 'label'. + base_dir: base directory in case related path is used for the keys in datalist. Defaults to None. + limit: limit number of inputs for pre-processing. Defaults to 0 (no limit). + relative_path: output keys values should be based on relative path. Defaults to False. + transforms: explicit transforms to execute operations on input data. + + Raises: + ValueError: When ``dimension`` is not one of [2, 3] + ValueError: When ``datalist`` is Empty + + Returns: + A new datalist that contains path to the images/labels after pre-processing. + + Example:: + + datalist = create_dataset( + datalist=[{'image': 'img1.nii', 'label': 'label1.nii'}], + base_dir=None, + output_dir=output_2d, + dimension=2, + image_key='image', + label_key='label', + pixdim=(1.0, 1.0), + limit=0, + relative_path=True + ) + + print(datalist[0]["image"], datalist[0]["label"]) + """ + + if dimension not in [2, 3]: + raise ValueError("Dimension can be only 2 or 3 as Deepgrow supports only 2D/3D Training") + + if not len(datalist): + raise ValueError("Input datalist is empty") + + transforms = _default_transforms(image_key, label_key, pixdim) if transforms is None else transforms + new_datalist = [] + for idx in range(len(datalist)): + if limit and idx >= limit: + break + + image = datalist[idx][image_key] + label = datalist[idx].get(label_key, None) + if base_dir: + image = os.path.join(base_dir, image) + label = os.path.join(base_dir, label) if label else None + + image = os.path.abspath(image) + label = os.path.abspath(label) if label else None + + logging.info("Image: {}; Label: {}".format(image, label if label else None)) + data = transforms({image_key: image, label_key: label}) + if dimension == 2: + data = _save_data_2d( + vol_idx=idx, + vol_image=data[image_key], + vol_label=data[label_key], + dataset_dir=output_dir, + relative_path=relative_path, + ) + else: + data = _save_data_3d( + vol_idx=idx, + vol_image=data[image_key], + vol_label=data[label_key], + dataset_dir=output_dir, + relative_path=relative_path, + ) + new_datalist.extend(data) + return new_datalist + + +def _default_transforms(image_key, label_key, pixdim): + keys = [image_key] if label_key is None else [image_key, label_key] + mode = [GridSampleMode.BILINEAR, GridSampleMode.NEAREST] if len(keys) == 2 else [GridSampleMode.BILINEAR] + return Compose( + [ + LoadImaged(keys=keys), + AsChannelFirstd(keys=keys), + Spacingd(keys=keys, pixdim=pixdim, mode=mode), + Orientationd(keys=keys, axcodes="RAS"), + ] + ) + + +def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path): + data_list = [] + + if len(vol_image.shape) == 4: + logging.info( + "4D-Image, pick only first series; Image: {}; Label: {}".format( + vol_image.shape, vol_label.shape if vol_label else None + ) + ) + vol_image = vol_image[0] + vol_image = np.moveaxis(vol_image, -1, 0) + + image_count = 0 + label_count = 0 + unique_labels_count = 0 + for sid in range(vol_image.shape[0]): + image = vol_image[sid, ...] + label = vol_label[sid, ...] if vol_label is not None else None + + if vol_label is not None and np.sum(label) == 0: + continue + + image_file_prefix = "vol_idx_{:0>4d}_slice_{:0>3d}".format(vol_idx, sid) + image_file = os.path.join(dataset_dir, "images", image_file_prefix) + image_file += ".npy" + + os.makedirs(os.path.join(dataset_dir, "images"), exist_ok=True) + np.save(image_file, image) + image_count += 1 + + # Test Data + if vol_label is None: + data_list.append( + { + "image": image_file.replace(dataset_dir + "/", "") if relative_path else image_file, + } + ) + continue + + # For all Labels + unique_labels = np.unique(label.flatten()) + unique_labels = unique_labels[unique_labels != 0] + unique_labels_count = max(unique_labels_count, len(unique_labels)) + + for idx in unique_labels: + label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx)) + label_file = os.path.join(dataset_dir, "labels", label_file_prefix) + label_file += ".npy" + + os.makedirs(os.path.join(dataset_dir, "labels"), exist_ok=True) + curr_label = (label == idx).astype(np.float32) + np.save(label_file, curr_label) + + label_count += 1 + data_list.append( + { + "image": image_file.replace(dataset_dir + "/", "") if relative_path else image_file, + "label": label_file.replace(dataset_dir + "/", "") if relative_path else label_file, + "region": int(idx), + } + ) + + logging.info( + "{} => Image Shape: {} => {}; Label Shape: {} => {}; Unique Labels: {}".format( + vol_idx, + vol_image.shape, + image_count, + vol_label.shape if vol_label is not None else None, + label_count, + unique_labels_count, + ) + ) + return data_list + + +def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path): + data_list = [] + + if len(vol_image.shape) == 4: + logging.info("4D-Image, pick only first series; Image: {}; Label: {}".format(vol_image.shape, vol_label.shape)) + vol_image = vol_image[0] + vol_image = np.moveaxis(vol_image, -1, 0) + + image_count = 0 + label_count = 0 + unique_labels_count = 0 + + image_file_prefix = "vol_idx_{:0>4d}".format(vol_idx) + image_file = os.path.join(dataset_dir, "images", image_file_prefix) + image_file += ".npy" + + os.makedirs(os.path.join(dataset_dir, "images"), exist_ok=True) + np.save(image_file, vol_image) + image_count += 1 + + # Test Data + if vol_label is None: + data_list.append( + { + "image": image_file.replace(dataset_dir + "/", "") if relative_path else image_file, + } + ) + else: + # For all Labels + unique_labels = np.unique(vol_label.flatten()) + unique_labels = unique_labels[unique_labels != 0] + unique_labels_count = max(unique_labels_count, len(unique_labels)) + + for idx in unique_labels: + label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx)) + label_file = os.path.join(dataset_dir, "labels", label_file_prefix) + label_file += ".npy" + + curr_label = (vol_label == idx).astype(np.float32) + os.makedirs(os.path.join(dataset_dir, "labels"), exist_ok=True) + np.save(label_file, curr_label) + + label_count += 1 + data_list.append( + { + "image": image_file.replace(dataset_dir + "/", "") if relative_path else image_file, + "label": label_file.replace(dataset_dir + "/", "") if relative_path else label_file, + "region": int(idx), + } + ) + + logging.info( + "{} => Image Shape: {} => {}; Label Shape: {} => {}; Unique Labels: {}".format( + vol_idx, + vol_image.shape, + image_count, + vol_label.shape if vol_label is not None else None, + label_count, + unique_labels_count, + ) + ) + return data_list diff --git a/tests/test_deepgrow_dataset.py b/tests/test_deepgrow_dataset.py new file mode 100644 index 0000000000..f5e9bf1dda --- /dev/null +++ b/tests/test_deepgrow_dataset.py @@ -0,0 +1,55 @@ +# Copyright 2020 - 2021 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import nibabel as nib +import numpy as np + +from monai.apps.deepgrow.dataset import create_dataset + + +class TestCreateDataset(unittest.TestCase): + def _create_data(self, tempdir): + affine = np.eye(4) + image = np.random.randint(0, 2, size=(128, 128, 40)) + image_file = os.path.join(tempdir, "image1.nii.gz") + nib.save(nib.Nifti1Image(image, affine), image_file) + + label = np.zeros((128, 128, 40)) + label[0][1][0] = 1 + label[0][1][1] = 1 + label[0][0][2] = 1 + label[0][1][2] = 1 + label_file = os.path.join(tempdir, "label1.nii.gz") + nib.save(nib.Nifti1Image(label, affine), label_file) + + return [{"image": image_file, "label": label_file}] + + def test_create_dataset_2d(self): + with tempfile.TemporaryDirectory() as tempdir: + datalist = self._create_data(tempdir) + output_dir = os.path.join(tempdir, "2d") + deepgrow_datalist = create_dataset(datalist=datalist, output_dir=output_dir, dimension=2, pixdim=(1, 1)) + assert len(deepgrow_datalist) == 3 and deepgrow_datalist[0]["region"] == 1 + + def test_create_dataset_3d(self): + with tempfile.TemporaryDirectory() as tempdir: + datalist = self._create_data(tempdir) + output_dir = os.path.join(tempdir, "3d") + deepgrow_datalist = create_dataset(datalist=datalist, output_dir=output_dir, dimension=3, pixdim=(1, 1, 1)) + assert len(deepgrow_datalist) == 1 and deepgrow_datalist[0]["region"] == 1 + + +if __name__ == "__main__": + unittest.main() From 21f0b7ba1c1e5b142793c25d5c075eca4d820f4c Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Fri, 12 Feb 2021 15:20:30 -0800 Subject: [PATCH 2/3] Fix CI/CD issue Signed-off-by: YuanTingHsieh --- monai/apps/deepgrow/dataset.py | 2 +- tests/min_tests.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/apps/deepgrow/dataset.py b/monai/apps/deepgrow/dataset.py index 2f684a9707..4ba760afd5 100644 --- a/monai/apps/deepgrow/dataset.py +++ b/monai/apps/deepgrow/dataset.py @@ -26,7 +26,7 @@ def create_dataset( pixdim, image_key: str = "image", label_key: str = "label", - base_dir: str = None, + base_dir=None, limit: int = 0, relative_path: bool = False, transforms=None, diff --git a/tests/min_tests.py b/tests/min_tests.py index 0fd6985067..e2c7bc529a 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -104,6 +104,7 @@ def run_testsuit(): "test_handler_metrics_saver_dist", "test_evenly_divisible_all_gather_dist", "test_handler_classification_saver_dist", + "test_deepgrow_dataset", ] assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}" From 1ab63da0e326acaeabdc723af423ddb2b1e6d780 Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Fri, 19 Feb 2021 11:46:55 -0800 Subject: [PATCH 3/3] Fix issues based on review Signed-off-by: YuanTingHsieh --- monai/apps/deepgrow/dataset.py | 28 +++++++++++++++++++--------- tests/test_deepgrow_dataset.py | 6 ++++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/monai/apps/deepgrow/dataset.py b/monai/apps/deepgrow/dataset.py index 4ba760afd5..45cfbde6ea 100644 --- a/monai/apps/deepgrow/dataset.py +++ b/monai/apps/deepgrow/dataset.py @@ -37,10 +37,10 @@ def create_dataset( for Deepgrow training pipeline. Args: - datalist: A generic dataset with a length property which normally contains a list of data dictionary. + datalist: A list of data dictionary. Each entry should at least contain 'image_key': . For example, typical input data can be a list of dictionaries:: - [{'image': 'img1.nii', 'label': 'label1.nii'}] + [{'image': , 'label':