From 46a77253ef2b68e12ef089ef9e102b10a973153a Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Fri, 17 Mar 2023 11:26:31 -0400 Subject: [PATCH 01/18] Add multi-gpu data analyzer --- monai/apps/auto3dseg/data_analyzer.py | 116 +++++++++++++++----------- 1 file changed, 67 insertions(+), 49 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 3bb67bdbe2..c68b0f620d 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -17,6 +17,7 @@ import numpy as np import torch +from torch.multiprocessing import Process, set_start_method, Manager from monai.apps.auto3dseg.transforms import EnsureSameShaped from monai.apps.utils import get_logger @@ -24,12 +25,15 @@ from monai.auto3dseg.utils import datafold_read from monai.bundle import config_parser from monai.bundle.config_parser import ConfigParser -from monai.data import DataLoader, Dataset +from monai.data import DataLoader, Dataset, partition_dataset from monai.data.utils import no_collation from monai.transforms import Compose, EnsureTyped, LoadImaged, Orientationd from monai.utils import StrEnum, min_version, optional_import from monai.utils.enums import DataStatsKeys, ImageStatsKeys +import warnings +# remove the warning "warnings.warn(f"Modifying image pixdim from {pixdim} to {norm}")" +warnings.filterwarnings("ignore", category=UserWarning, module='monai') def strenum_representer(dumper, data): return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) @@ -115,10 +119,10 @@ def __init__( self, datalist: str | dict, dataroot: str = "", - output_path: str = "./datastats.yaml", + output_path: str = "./data_stats.yaml", average: bool = True, do_ccp: bool = False, - device: str | torch.device = "cpu", + device: str | torch.device = "cuda", worker: int = 4, image_key: str = "image", label_key: str | None = "label", @@ -169,6 +173,58 @@ def _check_data_uniformity(keys: list[str], result: dict) -> bool: return True def get_all_case_stats(self, key="training", transform_list=None): + """ Wrapper for the internal _get_all_case_stats to perform multi-gpu processing + """ + if self.device.type == 'cpu': + nprocs = 1 + print(f'Using CPU for data analyzing!') + else: + nprocs = torch.cuda.device_count() + print(f'Found {nprocs} GPUs for data analyzing!') + set_start_method('forkserver', force=True) + with Manager() as manager: + manager_list = manager.list() + processes = [] + for rank in range(nprocs): + p = Process(target=self._get_all_case_stats, args=(rank, nprocs, manager_list, key, transform_list)) + processes.append(p) + for p in processes: + p.start() + for p in processes: + p.join() + # merge DataStatsKeys.BY_CASE + result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} + for _ in manager_list: + result[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) + summarizer = SegSummarizer( + self.image_key, + self.label_key, + average=self.average, + do_ccp=self.do_ccp, + hist_bins=self.hist_bins, + hist_range=self.hist_range, + histogram_only=self.histogram_only, + ) + result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result[DataStatsKeys.BY_CASE])) + + if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): + print("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") + + if self.output_path: + ConfigParser.export_config_file( + result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False + ) + + # release memory + d = None + if self.device.type == "cuda": + # release unreferenced tensors to mitigate OOM + # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 + torch.cuda.empty_cache() + + return result + + def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Manager.list=[], key="training", transform_list=None): """ Get all case stats. Caller of the DataAnalyser class. The function iterates datalist and call get_case_stats to generate stats. Then get_case_summary is called to combine results. @@ -224,31 +280,23 @@ def get_all_case_stats(self, key="training", transform_list=None): ) transform = Compose(transform_list) - files, _ = datafold_read(datalist=self.datalist, basedir=self.dataroot, fold=-1, key=key) + files = partition_dataset(data=files, num_partitions=world_size)[rank] dataset = Dataset(data=files, transform=transform) - dataloader = DataLoader( - dataset, - batch_size=1, - shuffle=False, - num_workers=self.worker, - collate_fn=no_collation, - pin_memory=self.device.type == "cuda", - ) + dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.worker, collate_fn=no_collation) result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - + device = self.device if self.device.type == 'cpu' else torch.device(f'cuda',rank) if not has_tqdm: warnings.warn("tqdm is not installed. not displaying the caching progress.") - for batch_data in tqdm(dataloader) if has_tqdm else dataloader: + for batch_data in tqdm(dataloader) if (has_tqdm and rank==0) else dataloader: batch_data = batch_data[0] - batch_data[self.image_key] = batch_data[self.image_key].to(self.device) + batch_data[self.image_key] = batch_data[self.image_key].to(device) if self.label_key is not None: label = batch_data[self.label_key] label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] - batch_data[self.label_key] = label.to(self.device) + batch_data[self.label_key] = label.to(device) d = summarizer(batch_data) @@ -267,37 +315,7 @@ def get_all_case_stats(self, key="training", transform_list=None): DataStatsKeys.LABEL_STATS: d[DataStatsKeys.LABEL_STATS], } ) - result_bycase[DataStatsKeys.BY_CASE].append(stats_by_cases) - - n_cases = len(result_bycase[DataStatsKeys.BY_CASE]) - - result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result_bycase[DataStatsKeys.BY_CASE])) - result[DataStatsKeys.SUMMARY]["n_cases"] = n_cases - result[DataStatsKeys.BY_CASE] = [None] * n_cases - - if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): - print("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") - - if self.output_path: - # saving summary and by_case as 2 files, to minimize loading time when only the summary is necessary - ConfigParser.export_config_file( - result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False - ) - ConfigParser.export_config_file( - result_bycase, - self.output_path.replace(".yaml", "_by_case.yaml"), - fmt=self.fmt, - default_flow_style=None, - sort_keys=False, - ) + result[DataStatsKeys.BY_CASE].append(stats_by_cases) + manager_list.append(result) - # release memory - d = None - if self.device.type == "cuda": - # release unreferenced tensors to mitigate OOM - # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 - torch.cuda.empty_cache() - # return combined - result[DataStatsKeys.BY_CASE] = result_bycase[DataStatsKeys.BY_CASE] - return result From 6f4a0ccd7d1d0be7712fb6595fc7f176ccceab49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Mar 2023 17:58:19 +0000 Subject: [PATCH 02/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/auto3dseg/data_analyzer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index c68b0f620d..1729face53 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -181,7 +181,7 @@ def get_all_case_stats(self, key="training", transform_list=None): else: nprocs = torch.cuda.device_count() print(f'Found {nprocs} GPUs for data analyzing!') - set_start_method('forkserver', force=True) + set_start_method('forkserver', force=True) with Manager() as manager: manager_list = manager.list() processes = [] @@ -317,5 +317,3 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana ) result[DataStatsKeys.BY_CASE].append(stats_by_cases) manager_list.append(result) - - From f7489270dd53df570b0fb4c2a2000a4e673462e7 Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Tue, 21 Mar 2023 22:11:26 -0400 Subject: [PATCH 03/18] Update multi-gpu data analyzer Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 131 ++++++++++++++++---------- 1 file changed, 81 insertions(+), 50 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 1729face53..cb1d2acf6a 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -65,8 +65,7 @@ class DataAnalyzer: average: whether to average the statistical value across different image modalities. do_ccp: apply the connected component algorithm to process the labels/images device: a string specifying hardware (CUDA/CPU) utilized for the operations. - worker: number of workers to use for parallel processing. If device is cuda/GPU, worker has - to be 0. + worker: number of workers to use for loading datasets in each GPU/CPU sub-process. image_key: a string that user specify for the image. The DataAnalyzer will look it up in the datalist to locate the image files of the dataset. label_key: a string that user specify for the label. The DataAnalyzer will look it up in the @@ -119,7 +118,7 @@ def __init__( self, datalist: str | dict, dataroot: str = "", - output_path: str = "./data_stats.yaml", + output_path: str = "./datastats.yaml", average: bool = True, do_ccp: bool = False, device: str | torch.device = "cuda", @@ -173,14 +172,41 @@ def _check_data_uniformity(keys: list[str], result: dict) -> bool: return True def get_all_case_stats(self, key="training", transform_list=None): - """ Wrapper for the internal _get_all_case_stats to perform multi-gpu processing + """ + Get all case stats. Caller of the DataAnalyser class. The function initiates multiple GPU or CPU processes of the internal + _get_all_case_stats functions, which iterates datalist and call SegSummarizer to generate stats for each case. + After all case stats are generated, SegSummarizer is called to combine results. + + Args: + key: dataset key + transform_list: option list of transforms before SegSummarizer + + Returns: + A data statistics dictionary containing + "stats_summary" (summary statistics of the entire datasets). Within stats_summary + there are "image_stats" (summarizing info of shape, channel, spacing, and etc + using operations_summary), "image_foreground_stats" (info of the intensity for the + non-zero labeled voxels), and "label_stats" (info of the labels, pixel percentage, + image_intensity, and each individual label in a list) + "stats_by_cases" (List type value. Each element of the list is statistics of + an image-label info. Within each element, there are: "image" (value is the + path to an image), "label" (value is the path to the corresponding label), "image_stats" + (summarizing info of shape, channel, spacing, and etc using operations), + "image_foreground_stats" (similar to the previous one but one foreground image), and + "label_stats" (stats of the individual labels ) + + Notes: + Since the backend of the statistics computation are torch/numpy, nan/inf value + may be generated and carried over in the computation. In such cases, the output + dictionary will include .nan/.inf in the statistics. + """ if self.device.type == 'cpu': nprocs = 1 - print(f'Using CPU for data analyzing!') + logger.info(f'Using CPU for data analyzing!') else: nprocs = torch.cuda.device_count() - print(f'Found {nprocs} GPUs for data analyzing!') + logger.info(f'Found {nprocs} GPUs for data analyzing!') set_start_method('forkserver', force=True) with Manager() as manager: manager_list = manager.list() @@ -194,8 +220,9 @@ def get_all_case_stats(self, key="training", transform_list=None): p.join() # merge DataStatsKeys.BY_CASE result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} + result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} for _ in manager_list: - result[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) + result_bycase[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) summarizer = SegSummarizer( self.image_key, self.label_key, @@ -205,53 +232,39 @@ def get_all_case_stats(self, key="training", transform_list=None): hist_range=self.hist_range, histogram_only=self.histogram_only, ) - result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result[DataStatsKeys.BY_CASE])) - + n_cases = len(result_bycase[DataStatsKeys.BY_CASE]) + result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result_bycase[DataStatsKeys.BY_CASE])) + result[DataStatsKeys.SUMMARY]["n_cases"] = n_cases + result_bycase[DataStatsKeys.SUMMARY] = result[DataStatsKeys.SUMMARY] if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): - print("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") + logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") if self.output_path: ConfigParser.export_config_file( result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False ) - + ConfigParser.export_config_file( + result_bycase, + self.output_path.replace(".yaml", "_by_case.yaml"), + fmt=self.fmt, + default_flow_style=None, + sort_keys=False, + ) # release memory d = None if self.device.type == "cuda": # release unreferenced tensors to mitigate OOM # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 torch.cuda.empty_cache() - + result[DataStatsKeys.BY_CASE] = result_bycase[DataStatsKeys.BY_CASE] return result def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Manager.list=[], key="training", transform_list=None): """ - Get all case stats. Caller of the DataAnalyser class. The function iterates datalist and - call get_case_stats to generate stats. Then get_case_summary is called to combine results. - + Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats. Args: key: dataset key transform_list: option list of transforms before SegSummarizer - - Returns: - A data statistics dictionary containing - "stats_summary" (summary statistics of the entire datasets). Within stats_summary - there are "image_stats" (summarizing info of shape, channel, spacing, and etc - using operations_summary), "image_foreground_stats" (info of the intensity for the - non-zero labeled voxels), and "label_stats" (info of the labels, pixel percentage, - image_intensity, and each individual label in a list) - "stats_by_cases" (List type value. Each element of the list is statistics of - an image-label info. Within each element, there are: "image" (value is the - path to an image), "label" (value is the path to the corresponding label), "image_stats" - (summarizing info of shape, channel, spacing, and etc using operations), - "image_foreground_stats" (similar to the previous one but one foreground image), and - "label_stats" (stats of the individual labels ) - - Notes: - Since the backend of the statistics computation are torch/numpy, nan/inf value - may be generated and carried over in the computation. In such cases, the output - dictionary will include .nan/.inf in the statistics. - """ summarizer = SegSummarizer( self.image_key, @@ -265,7 +278,7 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana keys = list(filter(None, [self.image_key, self.label_key])) if transform_list is None: transform_list = [ - LoadImaged(keys=keys, ensure_channel_first=True, image_only=True), + LoadImaged(keys=keys, ensure_channel_first=True, image_only=False), EnsureTyped(keys=keys, data_type="tensor", dtype=torch.float), Orientationd(keys=keys, axcodes="RAS"), ] @@ -281,25 +294,43 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana transform = Compose(transform_list) files, _ = datafold_read(datalist=self.datalist, basedir=self.dataroot, fold=-1, key=key) - files = partition_dataset(data=files, num_partitions=world_size)[rank] + if world_size <= len(files): + files = partition_dataset(data=files, num_partitions=world_size)[rank] + else: + files = partition_dataset(data=files, num_partitions=len(files))[rank] if rank < len(files) else [] dataset = Dataset(data=files, transform=transform) - dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.worker, collate_fn=no_collation) - result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} + dataloader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + num_workers=self.worker, + collate_fn=no_collation, + pin_memory=self.device.type == "cuda", + ) + result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} device = self.device if self.device.type == 'cpu' else torch.device(f'cuda',rank) if not has_tqdm: warnings.warn("tqdm is not installed. not displaying the caching progress.") for batch_data in tqdm(dataloader) if (has_tqdm and rank==0) else dataloader: batch_data = batch_data[0] - batch_data[self.image_key] = batch_data[self.image_key].to(device) - - if self.label_key is not None: - label = batch_data[self.label_key] - label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] - batch_data[self.label_key] = label.to(device) - - d = summarizer(batch_data) - + try: + batch_data[self.image_key] = batch_data[self.image_key].to(device) + if self.label_key is not None: + label = batch_data[self.label_key] + label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] + batch_data[self.label_key] = label.to(device) + + d = summarizer(batch_data) + except: + logger.info(f"Unable to process data {batch_data['image_meta_dict']['filename_or_obj']} on {device}.") + if self.device.type == 'cuda': + logger.info(f"Data analysis using CPU.") + batch_data[self.image_key] = batch_data[self.image_key].to('cpu') + if self.label_key is not None: + batch_data[self.label_key] = label.to('cpu') + d = summarizer(batch_data) + stats_by_cases = { DataStatsKeys.BY_CASE_IMAGE_PATH: d[DataStatsKeys.BY_CASE_IMAGE_PATH], DataStatsKeys.BY_CASE_LABEL_PATH: d[DataStatsKeys.BY_CASE_LABEL_PATH], @@ -315,5 +346,5 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana DataStatsKeys.LABEL_STATS: d[DataStatsKeys.LABEL_STATS], } ) - result[DataStatsKeys.BY_CASE].append(stats_by_cases) - manager_list.append(result) + result_bycase[DataStatsKeys.BY_CASE].append(stats_by_cases) + manager_list.append(result_bycase) From 0b71a767db2528c9a650b915fbd15e32354fc71f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 02:46:24 +0000 Subject: [PATCH 04/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/auto3dseg/data_analyzer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index cb1d2acf6a..da156998c3 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -173,8 +173,8 @@ def _check_data_uniformity(keys: list[str], result: dict) -> bool: def get_all_case_stats(self, key="training", transform_list=None): """ - Get all case stats. Caller of the DataAnalyser class. The function initiates multiple GPU or CPU processes of the internal - _get_all_case_stats functions, which iterates datalist and call SegSummarizer to generate stats for each case. + Get all case stats. Caller of the DataAnalyser class. The function initiates multiple GPU or CPU processes of the internal + _get_all_case_stats functions, which iterates datalist and call SegSummarizer to generate stats for each case. After all case stats are generated, SegSummarizer is called to combine results. Args: @@ -330,7 +330,7 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana if self.label_key is not None: batch_data[self.label_key] = label.to('cpu') d = summarizer(batch_data) - + stats_by_cases = { DataStatsKeys.BY_CASE_IMAGE_PATH: d[DataStatsKeys.BY_CASE_IMAGE_PATH], DataStatsKeys.BY_CASE_LABEL_PATH: d[DataStatsKeys.BY_CASE_LABEL_PATH], From e556ec27fe7dcda9a60815d3a2bb04371df23dc2 Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Fri, 24 Mar 2023 13:16:45 -0400 Subject: [PATCH 05/18] Ignore non-json/yaml configs for BundleAlg Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/bundle_gen.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/monai/apps/auto3dseg/bundle_gen.py b/monai/apps/auto3dseg/bundle_gen.py index 1831e9f840..a71c3a90cd 100644 --- a/monai/apps/auto3dseg/bundle_gen.py +++ b/monai/apps/auto3dseg/bundle_gen.py @@ -155,6 +155,8 @@ def _create_cmd(self, train_params=None): if os.path.isdir(config_dir): base_cmd = "" for file in os.listdir(config_dir): + if not (file.endswith('yaml') or file.endswith('json')): + continue if len(base_cmd) == 0: base_cmd += f"{train_py} run --config_file=" else: From 033f14d6dadc0dc156e5cbb57507d508b5ef83bc Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Mon, 27 Mar 2023 15:06:00 -0400 Subject: [PATCH 06/18] Skip multiprocessing init with cpu/single gpu Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 116 ++++++++++++++------------ 1 file changed, 63 insertions(+), 53 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index da156998c3..1e6f87efae 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -201,68 +201,75 @@ def get_all_case_stats(self, key="training", transform_list=None): dictionary will include .nan/.inf in the statistics. """ + result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} + result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} if self.device.type == 'cpu': nprocs = 1 logger.info(f'Using CPU for data analyzing!') else: nprocs = torch.cuda.device_count() logger.info(f'Found {nprocs} GPUs for data analyzing!') - set_start_method('forkserver', force=True) - with Manager() as manager: - manager_list = manager.list() - processes = [] - for rank in range(nprocs): - p = Process(target=self._get_all_case_stats, args=(rank, nprocs, manager_list, key, transform_list)) - processes.append(p) - for p in processes: - p.start() - for p in processes: - p.join() - # merge DataStatsKeys.BY_CASE - result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - for _ in manager_list: - result_bycase[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) - summarizer = SegSummarizer( - self.image_key, - self.label_key, - average=self.average, - do_ccp=self.do_ccp, - hist_bins=self.hist_bins, - hist_range=self.hist_range, - histogram_only=self.histogram_only, + if nprocs > 1: + set_start_method('forkserver', force=True) + with Manager() as manager: + manager_list = manager.list() + processes = [] + for rank in range(nprocs): + p = Process(target=self._get_all_case_stats, args=(rank, nprocs, manager_list, key, transform_list)) + processes.append(p) + print('mp time', time.time() - start) + for p in processes: + p.start() + for p in processes: + p.join() + # merge DataStatsKeys.BY_CASE + for _ in manager_list: + result_bycase[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) + else: + result_bycase = self._get_all_case_stats(0, 1, None, key, transform_list) + + summarizer = SegSummarizer( + self.image_key, + self.label_key, + average=self.average, + do_ccp=self.do_ccp, + hist_bins=self.hist_bins, + hist_range=self.hist_range, + histogram_only=self.histogram_only, + ) + n_cases = len(result_bycase[DataStatsKeys.BY_CASE]) + result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result_bycase[DataStatsKeys.BY_CASE])) + result[DataStatsKeys.SUMMARY]["n_cases"] = n_cases + result_bycase[DataStatsKeys.SUMMARY] = result[DataStatsKeys.SUMMARY] + if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): + logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") + if self.output_path: + ConfigParser.export_config_file( + result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False ) - n_cases = len(result_bycase[DataStatsKeys.BY_CASE]) - result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result_bycase[DataStatsKeys.BY_CASE])) - result[DataStatsKeys.SUMMARY]["n_cases"] = n_cases - result_bycase[DataStatsKeys.SUMMARY] = result[DataStatsKeys.SUMMARY] - if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): - logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") - - if self.output_path: - ConfigParser.export_config_file( - result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False - ) - ConfigParser.export_config_file( - result_bycase, - self.output_path.replace(".yaml", "_by_case.yaml"), - fmt=self.fmt, - default_flow_style=None, - sort_keys=False, - ) - # release memory - d = None - if self.device.type == "cuda": - # release unreferenced tensors to mitigate OOM - # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 - torch.cuda.empty_cache() - result[DataStatsKeys.BY_CASE] = result_bycase[DataStatsKeys.BY_CASE] - return result - - def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Manager.list=[], key="training", transform_list=None): + ConfigParser.export_config_file( + result_bycase, + self.output_path.replace(".yaml", "_by_case.yaml"), + fmt=self.fmt, + default_flow_style=None, + sort_keys=False, + ) + # release memory + d = None + if self.device.type == "cuda": + # release unreferenced tensors to mitigate OOM + # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 + torch.cuda.empty_cache() + result[DataStatsKeys.BY_CASE] = result_bycase[DataStatsKeys.BY_CASE] + return result + + def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, key="training", transform_list=None): """ Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats. Args: + rank: GPU process rank, 0 for CPU process + world_size: total number of GPUs, 1 for CPU process + manager_list: multiprocessing manager list object, if using multi-GPU. key: dataset key transform_list: option list of transforms before SegSummarizer """ @@ -347,4 +354,7 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list: Mana } ) result_bycase[DataStatsKeys.BY_CASE].append(stats_by_cases) - manager_list.append(result_bycase) + if manager_list is None: + return result_bycase + else: + manager_list.append(result_bycase) From a9689fa2c3b03bc155d1d2c4087b94b187364685 Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Mon, 27 Mar 2023 15:11:42 -0400 Subject: [PATCH 07/18] Remove warning filter Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 1e6f87efae..2770fe783f 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -31,10 +31,6 @@ from monai.utils import StrEnum, min_version, optional_import from monai.utils.enums import DataStatsKeys, ImageStatsKeys -import warnings -# remove the warning "warnings.warn(f"Modifying image pixdim from {pixdim} to {norm}")" -warnings.filterwarnings("ignore", category=UserWarning, module='monai') - def strenum_representer(dumper, data): return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) From e2ebaacf6eaed6d75915cb9b4cccafb0b73abb72 Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Mon, 3 Apr 2023 11:15:02 -0400 Subject: [PATCH 08/18] Fix label reference error for data analyzer Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 4 +++- try.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 try.py diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 2770fe783f..02fef9c9e0 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -328,9 +328,11 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, except: logger.info(f"Unable to process data {batch_data['image_meta_dict']['filename_or_obj']} on {device}.") if self.device.type == 'cuda': - logger.info(f"Data analysis using CPU.") + logger.info(f"DataAnalyzer `device` was set to use GPU but the execution hit an exception. Falling back to use `cpu`.") batch_data[self.image_key] = batch_data[self.image_key].to('cpu') if self.label_key is not None: + label = batch_data[self.label_key] + label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] batch_data[self.label_key] = label.to('cpu') d = summarizer(batch_data) diff --git a/try.py b/try.py new file mode 100644 index 0000000000..3f26c87cb4 --- /dev/null +++ b/try.py @@ -0,0 +1,6 @@ +try: + a = 1 + print(b) +except: + print('caught error') + print(a) From c39e9049984b570c26275a4c470289e9e136fc1e Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Mon, 3 Apr 2023 11:20:26 -0400 Subject: [PATCH 09/18] delete a tmp file Signed-off-by: heyufan1995 --- try.py | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 try.py diff --git a/try.py b/try.py deleted file mode 100644 index 3f26c87cb4..0000000000 --- a/try.py +++ /dev/null @@ -1,6 +0,0 @@ -try: - a = 1 - print(b) -except: - print('caught error') - print(a) From c536d44b589bbc99bcd1097d77f71bb0c0dad9fc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Apr 2023 14:46:16 +0000 Subject: [PATCH 10/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/auto3dseg/data_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 02fef9c9e0..03700cdf69 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -223,7 +223,7 @@ def get_all_case_stats(self, key="training", transform_list=None): result_bycase[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE]) else: result_bycase = self._get_all_case_stats(0, 1, None, key, transform_list) - + summarizer = SegSummarizer( self.image_key, self.label_key, @@ -265,7 +265,7 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, Args: rank: GPU process rank, 0 for CPU process world_size: total number of GPUs, 1 for CPU process - manager_list: multiprocessing manager list object, if using multi-GPU. + manager_list: multiprocessing manager list object, if using multi-GPU. key: dataset key transform_list: option list of transforms before SegSummarizer """ From 5e58527dfd06dd8f47a1a2b74f9b5f42aad5efa4 Mon Sep 17 00:00:00 2001 From: monai-bot Date: Wed, 5 Apr 2023 14:58:56 +0000 Subject: [PATCH 11/18] [MONAI] code formatting Signed-off-by: monai-bot --- monai/apps/auto3dseg/data_analyzer.py | 33 +++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 03700cdf69..27f658872c 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -17,7 +17,7 @@ import numpy as np import torch -from torch.multiprocessing import Process, set_start_method, Manager +from torch.multiprocessing import Manager, Process, set_start_method from monai.apps.auto3dseg.transforms import EnsureSameShaped from monai.apps.utils import get_logger @@ -31,6 +31,7 @@ from monai.utils import StrEnum, min_version, optional_import from monai.utils.enums import DataStatsKeys, ImageStatsKeys + def strenum_representer(dumper, data): return dumper.represent_scalar("tag:yaml.org,2002:str", data.value) @@ -199,21 +200,21 @@ def get_all_case_stats(self, key="training", transform_list=None): """ result: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - if self.device.type == 'cpu': + if self.device.type == "cpu": nprocs = 1 - logger.info(f'Using CPU for data analyzing!') + logger.info(f"Using CPU for data analyzing!") else: nprocs = torch.cuda.device_count() - logger.info(f'Found {nprocs} GPUs for data analyzing!') + logger.info(f"Found {nprocs} GPUs for data analyzing!") if nprocs > 1: - set_start_method('forkserver', force=True) + set_start_method("forkserver", force=True) with Manager() as manager: manager_list = manager.list() processes = [] for rank in range(nprocs): p = Process(target=self._get_all_case_stats, args=(rank, nprocs, manager_list, key, transform_list)) processes.append(p) - print('mp time', time.time() - start) + print("mp time", time.time() - start) for p in processes: p.start() for p in processes: @@ -259,7 +260,9 @@ def get_all_case_stats(self, key="training", transform_list=None): result[DataStatsKeys.BY_CASE] = result_bycase[DataStatsKeys.BY_CASE] return result - def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, key="training", transform_list=None): + def _get_all_case_stats( + self, rank: int = 0, world_size: int = 1, manager_list=None, key="training", transform_list=None + ): """ Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats. Args: @@ -311,11 +314,11 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, pin_memory=self.device.type == "cuda", ) result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - device = self.device if self.device.type == 'cpu' else torch.device(f'cuda',rank) + device = self.device if self.device.type == "cpu" else torch.device(f"cuda", rank) if not has_tqdm: warnings.warn("tqdm is not installed. not displaying the caching progress.") - for batch_data in tqdm(dataloader) if (has_tqdm and rank==0) else dataloader: + for batch_data in tqdm(dataloader) if (has_tqdm and rank == 0) else dataloader: batch_data = batch_data[0] try: batch_data[self.image_key] = batch_data[self.image_key].to(device) @@ -325,15 +328,17 @@ def _get_all_case_stats(self, rank: int=0, world_size: int=1, manager_list=None, batch_data[self.label_key] = label.to(device) d = summarizer(batch_data) - except: + except BaseException: logger.info(f"Unable to process data {batch_data['image_meta_dict']['filename_or_obj']} on {device}.") - if self.device.type == 'cuda': - logger.info(f"DataAnalyzer `device` was set to use GPU but the execution hit an exception. Falling back to use `cpu`.") - batch_data[self.image_key] = batch_data[self.image_key].to('cpu') + if self.device.type == "cuda": + logger.info( + f"DataAnalyzer `device` was set to use GPU but the execution hit an exception. Falling back to use `cpu`." + ) + batch_data[self.image_key] = batch_data[self.image_key].to("cpu") if self.label_key is not None: label = batch_data[self.label_key] label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] - batch_data[self.label_key] = label.to('cpu') + batch_data[self.label_key] = label.to("cpu") d = summarizer(batch_data) stats_by_cases = { From 7a0b73b7a95713c8d7b1b3a4a9d7c0b4dbe37d2e Mon Sep 17 00:00:00 2001 From: Wenqi Li <831580+wyli@users.noreply.github.com> Date: Wed, 5 Apr 2023 19:27:37 +0100 Subject: [PATCH 12/18] Update data_analyzer.py --- monai/apps/auto3dseg/data_analyzer.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 27f658872c..0a437e7ca2 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -202,7 +202,7 @@ def get_all_case_stats(self, key="training", transform_list=None): result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} if self.device.type == "cpu": nprocs = 1 - logger.info(f"Using CPU for data analyzing!") + logger.info("Using CPU for data analyzing!") else: nprocs = torch.cuda.device_count() logger.info(f"Found {nprocs} GPUs for data analyzing!") @@ -214,7 +214,6 @@ def get_all_case_stats(self, key="training", transform_list=None): for rank in range(nprocs): p = Process(target=self._get_all_case_stats, args=(rank, nprocs, manager_list, key, transform_list)) processes.append(p) - print("mp time", time.time() - start) for p in processes: p.start() for p in processes: @@ -252,7 +251,6 @@ def get_all_case_stats(self, key="training", transform_list=None): sort_keys=False, ) # release memory - d = None if self.device.type == "cuda": # release unreferenced tensors to mitigate OOM # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237 @@ -331,9 +329,7 @@ def _get_all_case_stats( except BaseException: logger.info(f"Unable to process data {batch_data['image_meta_dict']['filename_or_obj']} on {device}.") if self.device.type == "cuda": - logger.info( - f"DataAnalyzer `device` was set to use GPU but the execution hit an exception. Falling back to use `cpu`." - ) + logger.info("DataAnalyzer `device` set to GPU execution hit an exception. Falling back to `cpu`.") batch_data[self.image_key] = batch_data[self.image_key].to("cpu") if self.label_key is not None: label = batch_data[self.label_key] From 8b6c6be222c692f87e8f7c3800e0442c42e8c24b Mon Sep 17 00:00:00 2001 From: Wenqi Li <831580+wyli@users.noreply.github.com> Date: Wed, 5 Apr 2023 19:45:07 +0100 Subject: [PATCH 13/18] Update data_analyzer.py --- monai/apps/auto3dseg/data_analyzer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 0a437e7ca2..053f22652a 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -259,7 +259,11 @@ def get_all_case_stats(self, key="training", transform_list=None): return result def _get_all_case_stats( - self, rank: int = 0, world_size: int = 1, manager_list=None, key="training", transform_list=None + self, rank: int = 0, + world_size: int = 1, + manager_list: list | None = None, + key="training", + transform_list: list | None = None, ): """ Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats. @@ -312,7 +316,7 @@ def _get_all_case_stats( pin_memory=self.device.type == "cuda", ) result_bycase: dict[DataStatsKeys, Any] = {DataStatsKeys.SUMMARY: {}, DataStatsKeys.BY_CASE: []} - device = self.device if self.device.type == "cpu" else torch.device(f"cuda", rank) + device = self.device if self.device.type == "cpu" else torch.device("cuda", rank) if not has_tqdm: warnings.warn("tqdm is not installed. not displaying the caching progress.") From 64148e960cabd96925c5e19bf81f22b1c889d8d9 Mon Sep 17 00:00:00 2001 From: monai-bot Date: Wed, 5 Apr 2023 19:27:14 +0000 Subject: [PATCH 14/18] [MONAI] code formatting Signed-off-by: monai-bot --- monai/apps/auto3dseg/data_analyzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 053f22652a..340c4cce0e 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -259,7 +259,8 @@ def get_all_case_stats(self, key="training", transform_list=None): return result def _get_all_case_stats( - self, rank: int = 0, + self, + rank: int = 0, world_size: int = 1, manager_list: list | None = None, key="training", From 41645e114186e4bd8c1f93372f491c69d28d4569 Mon Sep 17 00:00:00 2001 From: Wenqi Li <831580+wyli@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:09:39 +0100 Subject: [PATCH 15/18] Update data_analyzer.py --- monai/apps/auto3dseg/data_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index 340c4cce0e..f544e7c653 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -263,9 +263,9 @@ def _get_all_case_stats( rank: int = 0, world_size: int = 1, manager_list: list | None = None, - key="training", + key: str = "training", transform_list: list | None = None, - ): + ) -> Any: """ Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats. Args: From d52e50fb33222cfa9e8bb976e645933066955ecc Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Tue, 11 Apr 2023 16:09:32 -0400 Subject: [PATCH 16/18] change image_only to True for dataanalyzer Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index f544e7c653..a7f0454073 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -287,7 +287,7 @@ def _get_all_case_stats( keys = list(filter(None, [self.image_key, self.label_key])) if transform_list is None: transform_list = [ - LoadImaged(keys=keys, ensure_channel_first=True, image_only=False), + LoadImaged(keys=keys, ensure_channel_first=True, image_only=True), EnsureTyped(keys=keys, data_type="tensor", dtype=torch.float), Orientationd(keys=keys, axcodes="RAS"), ] @@ -329,10 +329,13 @@ def _get_all_case_stats( label = batch_data[self.label_key] label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0] batch_data[self.label_key] = label.to(device) - d = summarizer(batch_data) except BaseException: - logger.info(f"Unable to process data {batch_data['image_meta_dict']['filename_or_obj']} on {device}.") + if 'image_meta_dict' in batch_data.keys(): + filename = batch_data['image_meta_dict']['filename_or_obj'] + else: + filename = batch_data[self.image_key].meta['filename_or_obj'] + logger.info(f"Unable to process data {filename} on {device}.") if self.device.type == "cuda": logger.info("DataAnalyzer `device` set to GPU execution hit an exception. Falling back to `cpu`.") batch_data[self.image_key] = batch_data[self.image_key].to("cpu") From 35dc2bdf1536d83117fbe21833239bc23167fbe6 Mon Sep 17 00:00:00 2001 From: monai-bot Date: Tue, 11 Apr 2023 20:23:19 +0000 Subject: [PATCH 17/18] [MONAI] code formatting Signed-off-by: monai-bot --- monai/apps/auto3dseg/data_analyzer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index a7f0454073..ee4ee113cb 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -331,10 +331,10 @@ def _get_all_case_stats( batch_data[self.label_key] = label.to(device) d = summarizer(batch_data) except BaseException: - if 'image_meta_dict' in batch_data.keys(): - filename = batch_data['image_meta_dict']['filename_or_obj'] + if "image_meta_dict" in batch_data.keys(): + filename = batch_data["image_meta_dict"]["filename_or_obj"] else: - filename = batch_data[self.image_key].meta['filename_or_obj'] + filename = batch_data[self.image_key].meta["filename_or_obj"] logger.info(f"Unable to process data {filename} on {device}.") if self.device.type == "cuda": logger.info("DataAnalyzer `device` set to GPU execution hit an exception. Falling back to `cpu`.") From 52d0d401152762310648851fe7f85db356c98365 Mon Sep 17 00:00:00 2001 From: heyufan1995 Date: Tue, 11 Apr 2023 17:51:36 -0400 Subject: [PATCH 18/18] Add stats by case back to datastats.yaml Signed-off-by: heyufan1995 --- monai/apps/auto3dseg/data_analyzer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index ee4ee113cb..950baed8f2 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -236,6 +236,7 @@ def get_all_case_stats(self, key="training", transform_list=None): n_cases = len(result_bycase[DataStatsKeys.BY_CASE]) result[DataStatsKeys.SUMMARY] = summarizer.summarize(cast(list, result_bycase[DataStatsKeys.BY_CASE])) result[DataStatsKeys.SUMMARY]["n_cases"] = n_cases + result[DataStatsKeys.BY_CASE] = [None] * n_cases result_bycase[DataStatsKeys.SUMMARY] = result[DataStatsKeys.SUMMARY] if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result")