-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Add deepgrow dataset #1581
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
wyli
merged 4 commits into
Project-MONAI:master
from
YuanTingHsieh:add_deepgrow_dataset
Feb 20, 2021
Merged
Add deepgrow dataset #1581
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,281 @@ | ||
| # Copyright 2020 - 2021 MONAI Consortium | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import logging | ||
| import os | ||
| from typing import Dict, List | ||
|
|
||
| import numpy as np | ||
|
|
||
| from monai.transforms import AsChannelFirstd, Compose, LoadImaged, Orientationd, Spacingd | ||
| from monai.utils import GridSampleMode | ||
|
|
||
|
|
||
| def create_dataset( | ||
| datalist, | ||
| output_dir: str, | ||
| dimension, | ||
| pixdim, | ||
| image_key: str = "image", | ||
| label_key: str = "label", | ||
| base_dir=None, | ||
| limit: int = 0, | ||
| relative_path: bool = False, | ||
| transforms=None, | ||
| ) -> List[Dict]: | ||
| """ | ||
| Utility to pre-process and create dataset list for Deepgrow training over on existing one. | ||
| The input data list is normally a list of images and labels (3D volume) that needs pre-processing | ||
| for Deepgrow training pipeline. | ||
|
|
||
| Args: | ||
| datalist: A list of data dictionary. Each entry should at least contain 'image_key': <image filename>. | ||
| For example, typical input data can be a list of dictionaries:: | ||
|
|
||
| [{'image': <image filename>, 'label': <label filename>}] | ||
|
|
||
| output_dir: target directory to store the training data for Deepgrow Training | ||
| pixdim: output voxel spacing. | ||
| dimension: dimension for Deepgrow training. It can be 2 or 3. | ||
| image_key: image key in input datalist. Defaults to 'image'. | ||
| label_key: label key in input datalist. Defaults to 'label'. | ||
| base_dir: base directory in case related path is used for the keys in datalist. Defaults to None. | ||
| limit: limit number of inputs for pre-processing. Defaults to 0 (no limit). | ||
| relative_path: output keys values should be based on relative path. Defaults to False. | ||
| transforms: explicit transforms to execute operations on input data. | ||
|
|
||
| Raises: | ||
| ValueError: When ``dimension`` is not one of [2, 3] | ||
| ValueError: When ``datalist`` is Empty | ||
|
|
||
| Returns: | ||
| A new datalist that contains path to the images/labels after pre-processing. | ||
|
|
||
| Example:: | ||
|
|
||
| datalist = create_dataset( | ||
| datalist=[{'image': 'img1.nii', 'label': 'label1.nii'}], | ||
| base_dir=None, | ||
| output_dir=output_2d, | ||
| dimension=2, | ||
| image_key='image', | ||
| label_key='label', | ||
| pixdim=(1.0, 1.0), | ||
| limit=0, | ||
| relative_path=True | ||
| ) | ||
|
|
||
| print(datalist[0]["image"], datalist[0]["label"]) | ||
| """ | ||
|
|
||
| if dimension not in [2, 3]: | ||
| raise ValueError("Dimension can be only 2 or 3 as Deepgrow supports only 2D/3D Training") | ||
|
|
||
| if not len(datalist): | ||
| raise ValueError("Input datalist is empty") | ||
|
|
||
| transforms = _default_transforms(image_key, label_key, pixdim) if transforms is None else transforms | ||
| new_datalist = [] | ||
| for idx in range(len(datalist)): | ||
| if limit and idx >= limit: | ||
| break | ||
|
|
||
| image = datalist[idx][image_key] | ||
| label = datalist[idx].get(label_key, None) | ||
| if base_dir: | ||
| image = os.path.join(base_dir, image) | ||
| label = os.path.join(base_dir, label) if label else None | ||
|
|
||
| image = os.path.abspath(image) | ||
| label = os.path.abspath(label) if label else None | ||
|
|
||
| logging.info("Image: {}; Label: {}".format(image, label if label else None)) | ||
| data = transforms({image_key: image, label_key: label}) | ||
| if dimension == 2: | ||
| data = _save_data_2d( | ||
| vol_idx=idx, | ||
| vol_image=data[image_key], | ||
| vol_label=data[label_key], | ||
| dataset_dir=output_dir, | ||
| relative_path=relative_path, | ||
| ) | ||
| else: | ||
| data = _save_data_3d( | ||
| vol_idx=idx, | ||
| vol_image=data[image_key], | ||
| vol_label=data[label_key], | ||
| dataset_dir=output_dir, | ||
| relative_path=relative_path, | ||
| ) | ||
| new_datalist.extend(data) | ||
| return new_datalist | ||
|
|
||
|
|
||
| def _default_transforms(image_key, label_key, pixdim): | ||
| keys = [image_key] if label_key is None else [image_key, label_key] | ||
| mode = [GridSampleMode.BILINEAR, GridSampleMode.NEAREST] if len(keys) == 2 else [GridSampleMode.BILINEAR] | ||
| return Compose( | ||
| [ | ||
| LoadImaged(keys=keys), | ||
| AsChannelFirstd(keys=keys), | ||
| Spacingd(keys=keys, pixdim=pixdim, mode=mode), | ||
| Orientationd(keys=keys, axcodes="RAS"), | ||
| ] | ||
| ) | ||
|
|
||
|
|
||
| def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path): | ||
| data_list = [] | ||
|
|
||
| if len(vol_image.shape) == 4: | ||
wyli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| logging.info( | ||
| "4D-Image, pick only first series; Image: {}; Label: {}".format( | ||
| vol_image.shape, vol_label.shape if vol_label else None | ||
| ) | ||
| ) | ||
| vol_image = vol_image[0] | ||
| vol_image = np.moveaxis(vol_image, -1, 0) | ||
|
|
||
| image_count = 0 | ||
| label_count = 0 | ||
| unique_labels_count = 0 | ||
| for sid in range(vol_image.shape[0]): | ||
| image = vol_image[sid, ...] | ||
| label = vol_label[sid, ...] if vol_label is not None else None | ||
|
|
||
| if vol_label is not None and np.sum(label) == 0: | ||
| continue | ||
|
|
||
| image_file_prefix = "vol_idx_{:0>4d}_slice_{:0>3d}".format(vol_idx, sid) | ||
| image_file = os.path.join(dataset_dir, "images", image_file_prefix) | ||
| image_file += ".npy" | ||
|
|
||
| os.makedirs(os.path.join(dataset_dir, "images"), exist_ok=True) | ||
| np.save(image_file, image) | ||
| image_count += 1 | ||
|
|
||
| # Test Data | ||
| if vol_label is None: | ||
| data_list.append( | ||
| { | ||
| "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file, | ||
| } | ||
| ) | ||
| continue | ||
|
|
||
| # For all Labels | ||
| unique_labels = np.unique(label.flatten()) | ||
YuanTingHsieh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| unique_labels = unique_labels[unique_labels != 0] | ||
| unique_labels_count = max(unique_labels_count, len(unique_labels)) | ||
|
|
||
| for idx in unique_labels: | ||
| label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx)) | ||
| label_file = os.path.join(dataset_dir, "labels", label_file_prefix) | ||
| label_file += ".npy" | ||
|
|
||
| os.makedirs(os.path.join(dataset_dir, "labels"), exist_ok=True) | ||
| curr_label = (label == idx).astype(np.float32) | ||
| np.save(label_file, curr_label) | ||
|
|
||
| label_count += 1 | ||
| data_list.append( | ||
| { | ||
| "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file, | ||
| "label": label_file.replace(dataset_dir + os.pathsep, "") if relative_path else label_file, | ||
| "region": int(idx), | ||
| } | ||
| ) | ||
|
|
||
| if unique_labels_count >= 20: | ||
| logging.warning(f"Unique labels {unique_labels_count} exceeds 20. Please check if this is correct.") | ||
|
|
||
| logging.info( | ||
| "{} => Image Shape: {} => {}; Label Shape: {} => {}; Unique Labels: {}".format( | ||
| vol_idx, | ||
| vol_image.shape, | ||
| image_count, | ||
| vol_label.shape if vol_label is not None else None, | ||
| label_count, | ||
| unique_labels_count, | ||
| ) | ||
| ) | ||
| return data_list | ||
|
|
||
|
|
||
| def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path): | ||
| data_list = [] | ||
|
|
||
| if len(vol_image.shape) == 4: | ||
| logging.info( | ||
| "4D-Image, pick only first series; Image: {}; Label: {}".format( | ||
| vol_image.shape, vol_label.shape if vol_label else None | ||
| ) | ||
| ) | ||
| vol_image = vol_image[0] | ||
| vol_image = np.moveaxis(vol_image, -1, 0) | ||
|
|
||
| image_count = 0 | ||
| label_count = 0 | ||
| unique_labels_count = 0 | ||
|
|
||
| image_file_prefix = "vol_idx_{:0>4d}".format(vol_idx) | ||
| image_file = os.path.join(dataset_dir, "images", image_file_prefix) | ||
| image_file += ".npy" | ||
|
|
||
| os.makedirs(os.path.join(dataset_dir, "images"), exist_ok=True) | ||
| np.save(image_file, vol_image) | ||
| image_count += 1 | ||
|
|
||
| # Test Data | ||
| if vol_label is None: | ||
| data_list.append( | ||
| { | ||
| "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file, | ||
| } | ||
| ) | ||
| else: | ||
| # For all Labels | ||
| unique_labels = np.unique(vol_label.flatten()) | ||
| unique_labels = unique_labels[unique_labels != 0] | ||
| unique_labels_count = max(unique_labels_count, len(unique_labels)) | ||
|
|
||
| for idx in unique_labels: | ||
| label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx)) | ||
| label_file = os.path.join(dataset_dir, "labels", label_file_prefix) | ||
| label_file += ".npy" | ||
|
|
||
| curr_label = (vol_label == idx).astype(np.float32) | ||
| os.makedirs(os.path.join(dataset_dir, "labels"), exist_ok=True) | ||
| np.save(label_file, curr_label) | ||
|
|
||
| label_count += 1 | ||
| data_list.append( | ||
| { | ||
| "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file, | ||
| "label": label_file.replace(dataset_dir + os.pathsep, "") if relative_path else label_file, | ||
| "region": int(idx), | ||
| } | ||
| ) | ||
|
|
||
| if unique_labels_count >= 20: | ||
| logging.warning(f"Unique labels {unique_labels_count} exceeds 20. Please check if this is correct.") | ||
|
|
||
| logging.info( | ||
| "{} => Image Shape: {} => {}; Label Shape: {} => {}; Unique Labels: {}".format( | ||
| vol_idx, | ||
| vol_image.shape, | ||
| image_count, | ||
| vol_label.shape if vol_label is not None else None, | ||
| label_count, | ||
| unique_labels_count, | ||
| ) | ||
| ) | ||
| return data_list | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| # Copyright 2020 - 2021 MONAI Consortium | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import os | ||
| import tempfile | ||
| import unittest | ||
|
|
||
| import nibabel as nib | ||
| import numpy as np | ||
|
|
||
| from monai.apps.deepgrow.dataset import create_dataset | ||
|
|
||
|
|
||
| class TestCreateDataset(unittest.TestCase): | ||
| def _create_data(self, tempdir): | ||
| affine = np.eye(4) | ||
| image = np.random.randint(0, 2, size=(128, 128, 40)) | ||
| image_file = os.path.join(tempdir, "image1.nii.gz") | ||
| nib.save(nib.Nifti1Image(image, affine), image_file) | ||
|
|
||
| label = np.zeros((128, 128, 40)) | ||
| label[0][1][0] = 1 | ||
| label[0][1][1] = 1 | ||
| label[0][0][2] = 1 | ||
| label[0][1][2] = 1 | ||
| label_file = os.path.join(tempdir, "label1.nii.gz") | ||
| nib.save(nib.Nifti1Image(label, affine), label_file) | ||
|
|
||
| return [{"image": image_file, "label": label_file}] | ||
|
|
||
| def test_create_dataset_2d(self): | ||
| with tempfile.TemporaryDirectory() as tempdir: | ||
| datalist = self._create_data(tempdir) | ||
| output_dir = os.path.join(tempdir, "2d") | ||
| deepgrow_datalist = create_dataset(datalist=datalist, output_dir=output_dir, dimension=2, pixdim=(1, 1)) | ||
| self.assertEqual(len(deepgrow_datalist), 3) | ||
| self.assertEqual(deepgrow_datalist[0]["region"], 1) | ||
|
|
||
| def test_create_dataset_3d(self): | ||
| with tempfile.TemporaryDirectory() as tempdir: | ||
| datalist = self._create_data(tempdir) | ||
| output_dir = os.path.join(tempdir, "3d") | ||
| deepgrow_datalist = create_dataset(datalist=datalist, output_dir=output_dir, dimension=3, pixdim=(1, 1, 1)) | ||
| self.assertEqual(len(deepgrow_datalist), 1) | ||
| self.assertEqual(deepgrow_datalist[0]["region"], 1) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.