Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions monai/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
compute_shape_offset,
correct_nifti_header_if_necessary,
create_file_basename,
decollate_batch,
dense_patch_slices,
get_random_patch,
get_valid_patch_size,
Expand Down
69 changes: 68 additions & 1 deletion monai/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from collections import defaultdict
from itertools import product, starmap
from pathlib import PurePath
from typing import Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Union
from typing import Any, Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Union

import numpy as np
import torch
Expand All @@ -37,6 +37,7 @@
optional_import,
)
from monai.utils.enums import Method
from monai.utils.misc import issequenceiterable

nib, _ = optional_import("nibabel")

Expand Down Expand Up @@ -64,6 +65,7 @@
"json_hashing",
"pickle_hashing",
"sorted_dict",
"decollate_batch",
"pad_list_data_collate",
]

Expand Down Expand Up @@ -255,6 +257,71 @@ def list_data_collate(batch: Sequence):
raise RuntimeError(re_str)


def decollate_batch(data: dict, batch_size: Optional[int] = None) -> List[dict]:
"""De-collate a batch of data (for example, as produced by a `DataLoader`).

Returns a list of dictionaries. Each dictionary will only contain the data for a given batch.

Images originally stored as (B,C,H,W,[D]) will be returned as (C,H,W,[D]). Other information,
such as metadata, may have been stored in a list (or a list inside nested dictionaries). In
this case we return the element of the list corresponding to the batch idx.

Return types aren't guaranteed to be the same as the original, since numpy arrays will have been
converted to torch.Tensor, and tuples/lists may have been converted to lists of tensors

For example:

.. code-block:: python

batch_data = {
"image": torch.rand((2,1,10,10)),
"image_meta_dict": {"scl_slope": torch.Tensor([0.0, 0.0])}
}
out = decollate_batch(batch_data)
print(len(out))
>>> 2

print(out[0])
>>> {'image': tensor([[[4.3549e-01...43e-01]]]), 'image_meta_dict': {'scl_slope': 0.0}}

Args:
data: data to be de-collated.
batch_size: number of batches in data. If `None` is passed, try to figure out batch size.
"""
if not isinstance(data, dict):
raise RuntimeError("Only currently implemented for dictionary data (might be trivial to adapt).")
if batch_size is None:
for v in data.values():
if isinstance(v, torch.Tensor):
batch_size = v.shape[0]
break
if batch_size is None:
raise RuntimeError("Couldn't determine batch size, please specify as argument.")

def torch_to_single(d: torch.Tensor):
"""If input is a torch.Tensor with only 1 element, return just the element."""
return d if d.numel() > 1 else d.item()

def decollate(data: Any, idx: int):
"""Recursively de-collate."""
if isinstance(data, dict):
return {k: decollate(v, idx) for k, v in data.items()}
if isinstance(data, torch.Tensor):
out = data[idx]
return torch_to_single(out)
elif isinstance(data, list):
if len(data) == 0:
return data
if isinstance(data[0], torch.Tensor):
return [torch_to_single(d[idx]) for d in data]
if issequenceiterable(data[0]):
return [decollate(d, idx) for d in data]
return data[idx]
raise TypeError(f"Not sure how to de-collate type: {type(data)}")

return [{key: decollate(data[key], idx) for key in data.keys()} for idx in range(batch_size)]


def pad_list_data_collate(
batch: Sequence,
method: Union[Method, str] = Method.SYMMETRIC,
Expand Down
3 changes: 3 additions & 0 deletions monai/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@
AsDiscreted,
AsDiscreteD,
AsDiscreteDict,
Decollated,
DecollateD,
DecollateDict,
Ensembled,
KeepLargestConnectedComponentd,
KeepLargestConnectedComponentD,
Expand Down
23 changes: 23 additions & 0 deletions monai/transforms/post/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import numpy as np
import torch

import monai.data
from monai.config import KeysCollection
from monai.transforms.post.array import (
Activations,
Expand Down Expand Up @@ -52,6 +53,9 @@
"MeanEnsembleDict",
"VoteEnsembleD",
"VoteEnsembleDict",
"DecollateD",
"DecollateDict",
"Decollated",
]


Expand Down Expand Up @@ -306,9 +310,28 @@ def __init__(
super().__init__(keys, ensemble, output_key)


class Decollated(MapTransform):
"""
Decollate a batch of data.

Note that unlike most MapTransforms, this will decollate all data, so keys are not needed.

Args:
batch_size: if not supplied, we try to determine it based on array lengths. Will raise an error if
it fails to determine it automatically.
"""

def __init__(self, batch_size: Optional[int] = None) -> None:
self.batch_size = batch_size

def __call__(self, data: dict) -> List[dict]:
return monai.data.decollate_batch(data, self.batch_size)


ActivationsD = ActivationsDict = Activationsd
AsDiscreteD = AsDiscreteDict = AsDiscreted
KeepLargestConnectedComponentD = KeepLargestConnectedComponentDict = KeepLargestConnectedComponentd
LabelToContourD = LabelToContourDict = LabelToContourd
MeanEnsembleD = MeanEnsembleDict = MeanEnsembled
VoteEnsembleD = VoteEnsembleDict = VoteEnsembled
DecollateD = DecollateDict = Decollated
88 changes: 88 additions & 0 deletions tests/test_decollate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2020 - 2021 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np
import torch
from parameterized import parameterized

from monai.data import CacheDataset, DataLoader, create_test_image_2d
from monai.data.utils import decollate_batch
from monai.transforms import AddChanneld, Compose, LoadImaged, RandFlipd, SpatialPadd, ToTensord
from monai.transforms.post.dictionary import Decollated
from monai.utils import optional_import, set_determinism
from tests.utils import make_nifti_image

_, has_nib = optional_import("nibabel")

IM_2D = create_test_image_2d(100, 101)[0]
DATA_2D = {"image": make_nifti_image(IM_2D) if has_nib else IM_2D}

TESTS = []
TESTS.append(
(
"2D",
[DATA_2D for _ in range(6)],
)
)


class TestDeCollate(unittest.TestCase):
def setUp(self) -> None:
set_determinism(seed=0)

def tearDown(self) -> None:
set_determinism(None)

def check_match(self, in1, in2):
if isinstance(in1, dict):
self.assertTrue(isinstance(in2, dict))
self.check_match(list(in1.keys()), list(in2.keys()))
self.check_match(list(in1.values()), list(in2.values()))
elif any(isinstance(in1, i) for i in [list, tuple]):
for l1, l2 in zip(in1, in2):
self.check_match(l1, l2)
elif any(isinstance(in1, i) for i in [str, int]):
self.assertEqual(in1, in2)
elif any(isinstance(in1, i) for i in [torch.Tensor, np.ndarray]):
np.testing.assert_array_equal(in1, in2)
else:
raise RuntimeError(f"Not sure how to compare types. type(in1): {type(in1)}, type(in2): {type(in2)}")

@parameterized.expand(TESTS)
def test_decollation(self, _, data, batch_size=2, num_workers=2):
transforms = Compose(
[
AddChanneld("image"),
SpatialPadd("image", 150),
RandFlipd("image", prob=1.0, spatial_axis=1),
ToTensord("image"),
]
)
# If nibabel present, read from disk
if has_nib:
transforms = Compose([LoadImaged("image"), transforms])

dataset = CacheDataset(data, transforms, progress=False)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

for b, batch_data in enumerate(loader):
decollated_1 = decollate_batch(batch_data)
decollated_2 = Decollated()(batch_data)

for decollated in [decollated_1, decollated_2]:
for i, d in enumerate(decollated):
self.check_match(dataset[b * batch_size + i], d)


if __name__ == "__main__":
unittest.main()