diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index deb76c9699..2b2f48f5d0 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -114,12 +114,9 @@ def _generate_data_list(self, dataset_dir: PathLike) -> List[Dict]: """ dataset_dir = Path(dataset_dir) - class_names = sorted(f"{x}" for x in dataset_dir.iterdir() if (dataset_dir / x).is_dir()) + class_names = sorted(f"{x.name}" for x in dataset_dir.iterdir() if x.is_dir()) # folder name as the class name self.num_class = len(class_names) - image_files = [ - [f"{dataset_dir.joinpath(class_names[i], x)}" for x in (dataset_dir / class_names[i]).iterdir()] - for i in range(self.num_class) - ] + image_files = [[f"{x}" for x in (dataset_dir / class_names[i]).iterdir()] for i in range(self.num_class)] num_each = [len(image_files[i]) for i in range(self.num_class)] image_files_list = [] image_class = [] @@ -145,7 +142,6 @@ def _generate_data_list(self, dataset_dir: PathLike) -> List[Dict]: raise ValueError( f'Unsupported section: {self.section}, available options are ["training", "validation", "test"].' ) - # the types of label and class name should be compatible with the pytorch dataloader return [ {"image": image_files_list[i], "label": image_class[i], "class_name": class_name[i]} diff --git a/monai/apps/utils.py b/monai/apps/utils.py index c2873959bb..f1619b9964 100644 --- a/monai/apps/utils.py +++ b/monai/apps/utils.py @@ -241,7 +241,7 @@ def extractall( cache_dir = Path(output_dir, _basename(filepath).split(".")[0]) else: cache_dir = Path(output_dir) - if cache_dir.exists() and len(list(cache_dir.iterdir())) > 0: + if cache_dir.exists() and next(cache_dir.iterdir(), None) is not None: logger.info(f"Non-empty folder exists in {cache_dir}, skipped extracting.") return filepath = Path(filepath) diff --git a/tests/test_decathlondataset.py b/tests/test_decathlondataset.py index 29ea3a3151..0756902385 100644 --- a/tests/test_decathlondataset.py +++ b/tests/test_decathlondataset.py @@ -60,6 +60,8 @@ def _test_dataset(dataset): root_dir=testing_dir, task="Task04_Hippocampus", transform=transform, section="validation", download=False ) _test_dataset(data) + self.assertTrue(data[0]["image_meta_dict"]["filename_or_obj"].endswith("hippocampus_163.nii.gz")) + self.assertTrue(data[0]["label_meta_dict"]["filename_or_obj"].endswith("hippocampus_163.nii.gz")) # test validation without transforms data = DecathlonDataset(root_dir=testing_dir, task="Task04_Hippocampus", section="validation", download=False) self.assertTupleEqual(data[0]["image"].shape, (36, 47, 44)) @@ -77,7 +79,7 @@ def _test_dataset(dataset): shutil.rmtree(os.path.join(testing_dir, "Task04_Hippocampus")) try: - data = DecathlonDataset( + DecathlonDataset( root_dir=testing_dir, task="Task04_Hippocampus", transform=transform, diff --git a/tests/test_mednistdataset.py b/tests/test_mednistdataset.py index f8d01902a5..a833ab75f3 100644 --- a/tests/test_mednistdataset.py +++ b/tests/test_mednistdataset.py @@ -55,16 +55,18 @@ def _test_dataset(dataset): # testing from data = MedNISTDataset(root_dir=Path(testing_dir), transform=transform, section="test", download=False) - data.get_num_classes() + self.assertEqual(data.get_num_classes(), 6) _test_dataset(data) data = MedNISTDataset(root_dir=testing_dir, section="test", download=False) self.assertTupleEqual(data[0]["image"].shape, (64, 64)) # test same dataset length with different random seed data = MedNISTDataset(root_dir=testing_dir, transform=transform, section="test", download=False, seed=42) _test_dataset(data) + self.assertEqual(data[0]["class_name"], "AbdomenCT") + self.assertEqual(data[0]["label"].cpu().item(), 0) shutil.rmtree(os.path.join(testing_dir, "MedNIST")) try: - data = MedNISTDataset(root_dir=testing_dir, transform=transform, section="test", download=False) + MedNISTDataset(root_dir=testing_dir, transform=transform, section="test", download=False) except RuntimeError as e: print(str(e)) self.assertTrue(str(e).startswith("Cannot find dataset directory"))