diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/common.yaml b/model-zoo/models/mednist_ddpm/bundle/configs/common.yaml new file mode 100644 index 00000000..e48b917b --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/common.yaml @@ -0,0 +1,61 @@ +# This file defines common definitions used in training and inference, most importantly the network definition + +imports: +- $import os +- $import datetime +- $import torch +- $import scripts +- $import monai +- $import generative +- $import torch.distributed as dist + +image: $monai.utils.CommonKeys.IMAGE +label: $monai.utils.CommonKeys.LABEL +pred: $monai.utils.CommonKeys.PRED + +is_dist: '$dist.is_initialized()' +rank: '$dist.get_rank() if @is_dist else 0' +is_not_rank0: '$@rank > 0' +device: '$torch.device(f"cuda:{@rank}" if torch.cuda.is_available() else "cpu")' + +network_def: + _target_: generative.networks.nets.DiffusionModelUNet + spatial_dims: 2 + in_channels: 1 + out_channels: 1 + num_channels: [64, 128, 128] + attention_levels: [false, true, true] + num_res_blocks: 1 + num_head_channels: 128 + +network: $@network_def.to(@device) + +bundle_root: . +ckpt_path: $@bundle_root + '/models/model.pt' +use_amp: true +image_dim: 64 +image_size: [1, '@image_dim', '@image_dim'] +num_train_timesteps: 1000 + +base_transforms: +- _target_: LoadImaged + keys: '@image' + image_only: true +- _target_: EnsureChannelFirstd + keys: '@image' +- _target_: ScaleIntensityRanged + keys: '@image' + a_min: 0.0 + a_max: 255.0 + b_min: 0.0 + b_max: 1.0 + clip: true + +scheduler: + _target_: generative.networks.schedulers.DDPMScheduler + num_train_timesteps: '@num_train_timesteps' + +inferer: + _target_: generative.inferers.DiffusionInferer + scheduler: '@scheduler' + \ No newline at end of file diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/infer.yaml b/model-zoo/models/mednist_ddpm/bundle/configs/infer.yaml new file mode 100644 index 00000000..f140c3b6 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/infer.yaml @@ -0,0 +1,38 @@ +# This defines an inference script for generating a random image to a Pytorch file + +batch_size: 1 +num_workers: 0 + +noise: $torch.rand(1,1,@image_dim,@image_dim) # create a random image every time this program is run + +out_file: "" # where to save the tensor to + +# using a lambda this defines a simple sampling function used below +sample: '$lambda x: @inferer.sample(input_noise=x, diffusion_model=@network, scheduler=@scheduler)' + +load_state: '$@network.load_state_dict(torch.load(@ckpt_path))' # command to load the saved model weights + +save_trans: + _target_: Compose + transforms: + - _target_: ScaleIntensity + minv: 0.0 + maxv: 255.0 + - _target_: ToTensor + track_meta: false + - _target_: SaveImage + output_ext: "jpg" + resample: false + output_dtype: '$torch.uint8' + separate_folder: false + output_postfix: '@out_file' + +# program to load the model weights, run `sample`, and store results to `out_file` +testing: +- '@load_state' +- '$torch.save(@sample(@noise.to(@device)), @out_file)' + +#alternative version which saves to a jpg file +testing_jpg: +- '@load_state' +- '$@save_trans(@sample(@noise.to(@device))[0])' \ No newline at end of file diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/logging.conf b/model-zoo/models/mednist_ddpm/bundle/configs/logging.conf new file mode 100644 index 00000000..db85a0b9 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/logging.conf @@ -0,0 +1,21 @@ +[loggers] +keys=root + +[handlers] +keys=consoleHandler + +[formatters] +keys=fullFormatter + +[logger_root] +level=INFO +handlers=consoleHandler + +[handler_consoleHandler] +class=StreamHandler +level=INFO +formatter=fullFormatter +args=(sys.stdout,) + +[formatter_fullFormatter] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s \ No newline at end of file diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/metadata.json b/model-zoo/models/mednist_ddpm/bundle/configs/metadata.json new file mode 100644 index 00000000..aef66f9f --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/metadata.json @@ -0,0 +1,59 @@ +{ + "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220729.json", + "version": "0.1.0", + "changelog": { + "0.1.0": "Initial version" + }, + "monai_version": "1.0.0", + "pytorch_version": "1.10.2", + "numpy_version": "1.21.2", + "optional_packages_version": {"generative":"0.1.0"}, + "task": "MedNIST Hand Generation", + "description": "", + "authors": "Walter Hugo Lopez Pinaya, Mark Graham, and Eric Kerfoot", + "copyright": "Copyright (c) KCL", + "references": [], + "intended_use": "This is suitable for research purposes only", + "image_classes": "Single channel magnitude data", + "data_source": "MedNIST", + "network_data_format": { + "inputs": { + "image": { + "type": "image", + "format": "magnitude", + "modality": "xray", + "num_channels": 1, + "spatial_shape": [ + 1, + 64, + 64 + ], + "dtype": "float32", + "value_range": [], + "is_patch_data": false, + "channel_def": { + "0": "image" + } + } + }, + "outputs": { + "pred": { + "type": "image", + "format": "magnitude", + "modality": "xray", + "num_channels": 1, + "spatial_shape": [ + 1, + 64, + 64 + ], + "dtype": "float32", + "value_range": [], + "is_patch_data": false, + "channel_def": { + "0": "image" + } + } + } + } +} diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/train.yaml b/model-zoo/models/mednist_ddpm/bundle/configs/train.yaml new file mode 100644 index 00000000..739b3c1f --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/train.yaml @@ -0,0 +1,157 @@ +# This defines the training script for the network + +# choose a new directory for every run +output_dir: $datetime.datetime.now().strftime('./results/output_%y%m%d_%H%M%S') +dataset_dir: ./data + +train_data: + _target_ : MedNISTDataset + root_dir: '@dataset_dir' + section: training + download: true + progress: false + seed: 0 + +val_data: + _target_ : MedNISTDataset + root_dir: '@dataset_dir' + section: validation + download: true + progress: false + seed: 0 + +train_datalist: '$[{"image": item["image"]} for item in @train_data.data if item["class_name"] == "Hand"]' +val_datalist: '$[{"image": item["image"]} for item in @val_data.data if item["class_name"] == "Hand"]' + +batch_size: 8 +num_substeps: 1 +num_workers: 4 +use_thread_workers: false + +lr: 0.000025 +rand_prob: 0.5 +num_epochs: 75 +val_interval: 5 +save_interval: 5 + +train_transforms: +- _target_: RandAffined + keys: '@image' + rotate_range: + - ['$-np.pi / 36', '$np.pi / 36'] + - ['$-np.pi / 36', '$np.pi / 36'] + translate_range: + - [-1, 1] + - [-1, 1] + scale_range: + - [-0.05, 0.05] + - [-0.05, 0.05] + spatial_size: [64, 64] + padding_mode: "zeros" + prob: '@rand_prob' + +train_ds: + _target_: Dataset + data: $@train_datalist + transform: + _target_: Compose + transforms: '$@base_transforms + @train_transforms' + +train_loader: + _target_: ThreadDataLoader + dataset: '@train_ds' + batch_size: '@batch_size' + repeats: '@num_substeps' + num_workers: '@num_workers' + use_thread_workers: '@use_thread_workers' + persistent_workers: '$@num_workers > 0' + shuffle: true + +val_ds: + _target_: Dataset + data: $@val_datalist + transform: + _target_: Compose + transforms: '@base_transforms' + +val_loader: + _target_: DataLoader + dataset: '@val_ds' + batch_size: '@batch_size' + num_workers: '@num_workers' + persistent_workers: '$@num_workers > 0' + shuffle: false + +lossfn: + _target_: torch.nn.MSELoss + +optimizer: + _target_: torch.optim.Adam + params: $@network.parameters() + lr: '@lr' + +prepare_batch: + _target_: scripts.DiffusionPrepareBatch + num_train_timesteps: '@num_train_timesteps' + +val_handlers: +- _target_: StatsHandler + name: train_log + output_transform: '$lambda x: None' + _disabled_: '@is_not_rank0' + +evaluator: + _target_: SupervisedEvaluator + device: '@device' + val_data_loader: '@val_loader' + network: '@network' + amp: '@use_amp' + inferer: '@inferer' + prepare_batch: '@prepare_batch' + key_val_metric: + val_mean_abs_error: + _target_: MeanAbsoluteError + output_transform: $monai.handlers.from_engine([@pred, @label]) + metric_cmp_fn: '$scripts.inv_metric_cmp_fn' + val_handlers: '$list(filter(bool, @val_handlers))' + +handlers: +- _target_: CheckpointLoader + _disabled_: $not os.path.exists(@ckpt_path) + load_path: '@ckpt_path' + load_dict: + model: '@network' +- _target_: ValidationHandler + validator: '@evaluator' + epoch_level: true + interval: '@val_interval' +- _target_: CheckpointSaver + save_dir: '@output_dir' + save_dict: + model: '@network' + save_interval: '@save_interval' + save_final: true + epoch_level: true + _disabled_: '@is_not_rank0' + +trainer: + _target_: SupervisedTrainer + max_epochs: '@num_epochs' + device: '@device' + train_data_loader: '@train_loader' + network: '@network' + loss_function: '@lossfn' + optimizer: '@optimizer' + inferer: '@inferer' + prepare_batch: '@prepare_batch' + key_train_metric: + train_acc: + _target_: MeanSquaredError + output_transform: $monai.handlers.from_engine([@pred, @label]) + metric_cmp_fn: '$scripts.inv_metric_cmp_fn' + train_handlers: '$list(filter(bool, @handlers))' + amp: '@use_amp' + +training: +- '$monai.utils.set_determinism(0)' +- '$@trainer.run()' diff --git a/model-zoo/models/mednist_ddpm/bundle/configs/train_multigpu.yaml b/model-zoo/models/mednist_ddpm/bundle/configs/train_multigpu.yaml new file mode 100644 index 00000000..2811612f --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/configs/train_multigpu.yaml @@ -0,0 +1,30 @@ +# This can be mixed in with the training script to enable multi-GPU training + +network: + _target_: torch.nn.parallel.DistributedDataParallel + module: $@network_def.to(@device) + device_ids: ['@device'] + find_unused_parameters: true + +tsampler: + _target_: DistributedSampler + dataset: '@train_ds' + even_divisible: true + shuffle: true +train_loader#sampler: '@tsampler' +train_loader#shuffle: false + +vsampler: + _target_: DistributedSampler + dataset: '@val_ds' + even_divisible: false + shuffle: false +val_loader#sampler: '@vsampler' + +training: +- $import torch.distributed as dist +- $dist.init_process_group(backend='nccl') +- $torch.cuda.set_device(@device) +- $monai.utils.set_determinism(seed=123), +- $@trainer.run() +- $dist.destroy_process_group() \ No newline at end of file diff --git a/model-zoo/models/mednist_ddpm/bundle/docs/2d_ddpm_bundle_tutorial.ipynb b/model-zoo/models/mednist_ddpm/bundle/docs/2d_ddpm_bundle_tutorial.ipynb new file mode 100644 index 00000000..4cd3f5d4 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/docs/2d_ddpm_bundle_tutorial.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c54f5831-58eb-4f9e-bb8a-2c2a6536a658", + "metadata": {}, + "source": [ + "# Denoising Diffusion Probabilistic Models with MedNIST Dataset Bundle \n", + "\n", + "This notebook discusses and uses the MONAI bundle it's included in for generating images from the MedNIST dataset using diffusion models. This is based off the 2d_ddpm_tutorial_ignite.ipynb notebook with a few changes.\n", + "\n", + "The bundle defines training and inference scripts whose use will be described here along with visualisations. The assumption with this notebook is that it's run within the bundle's `docs` directory and that the environment it runs in has `MONAI` and `GenerativeModels` installed. The command lines given are known to work in `bash` however may be problematic in Windows.\n", + "\n", + "First thing to do is import libraries and verify MONAI is present:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6d32f8a4-2bfe-4cfb-9abd-033b0c6080e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MONAI version: 1.1.0+45.g1a018a7b\n", + "Numpy version: 1.21.5\n", + "Pytorch version: 1.12.1\n", + "MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False\n", + "MONAI rev id: 1a018a7b3034a86360d999a6bcc796bad330bba4\n", + "MONAI __file__: /home/localek10/workspace/monai/MONAI_mine/monai/__init__.py\n", + "\n", + "Optional dependencies:\n", + "Pytorch Ignite version: 0.4.8\n", + "ITK version: 5.2.1\n", + "Nibabel version: 4.0.2\n", + "scikit-image version: 0.19.2\n", + "Pillow version: 9.2.0\n", + "Tensorboard version: 2.9.0\n", + "gdown version: 4.5.1\n", + "TorchVision version: 0.13.1\n", + "tqdm version: 4.64.0\n", + "lmdb version: 1.2.1\n", + "psutil version: 5.9.0\n", + "pandas version: 1.4.3\n", + "einops version: 0.6.0\n", + "transformers version: 4.18.0\n", + "mlflow version: 1.28.0\n", + "pynrrd version: 0.4.2\n", + "\n", + "For details about installing the optional dependencies, please visit:\n", + " https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies\n", + "\n" + ] + } + ], + "source": [ + "import os\n", + "import shutil\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "import torch\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import monai\n", + "from monai.bundle import ConfigParser\n", + "\n", + "# path to the bundle directory, this assumes you're running the notebook in its directory\n", + "bundle_root = str(Path(\".\").absolute().parent)\n", + "\n", + "monai.config.print_config()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d6fc6592-cb51-4527-97ee-add5d1cdbeb4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/tmp/tmpw33bol9_\n" + ] + } + ], + "source": [ + "directory = os.environ.get(\"MONAI_DATA_DIRECTORY\")\n", + "dataset_dir = tempfile.mkdtemp() if directory is None else directory\n", + "print(dataset_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "678d2e51-dc2d-4ad9-a4c0-14a6f900398b", + "metadata": {}, + "source": [ + "A bundle can be run on the command line using the Fire library or by parsing the configuration manually then getting parsed content objects. The following is the command to train the network for the default number of epochs. It will define values in the config files which need to be set for a particular run, such as the dataset directory created above, and setting the PYTHONPATH variable. The configuration for this bundle is split into 4 yaml files, one having common definitions for training and inference, one to enable multi-GPU training, and one each for training and inference. Their combinations determine what your final configuration is, in this case the common and train files produce a training script. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d52a4ae9-0d6d-4bc4-a5b5-f84470711f2d", + "metadata": {}, + "outputs": [], + "source": [ + "# multiple config files need to be specified this way with '' quotes, variable used in command line must be in \"\" quotes\n", + "configs=f\"'{bundle_root}/configs/common.yaml', '{bundle_root}/configs/train.yaml'\"\n", + "\n", + "!PYTHONPATH={bundle_root} python -m monai.bundle run training \\\n", + " --meta_file {bundle_root}/configs/metadata.json \\\n", + " --config_file \"{configs}\" \\\n", + " --logging_file {bundle_root}/configs/logging.conf \\\n", + " --bundle_root {bundle_root} \\\n", + " --dataset_dir {dataset_dir}" + ] + }, + { + "cell_type": "markdown", + "id": "5030732c-deb5-448a-b575-385bda0fa308", + "metadata": {}, + "source": [ + "The test inference script can then be invoked as such to produce an output tensor saved to the given file with a randomly generated image. The `ckpt_path` value should point to the final checkpoint file created during the above training run, which will be in a subdirectory of `./result`. The training script's default behaviour is to create a new timestamped subdirectory in `./result` for every new run, this can be explicitly set by providing a `output_dir` value on the command line." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "40e6a3e9-3984-44b0-ba9a-5b8d58c7ea2d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-02-16 21:00:18,139 - INFO - --- input summary of monai.bundle.scripts.run ---\n", + "2023-02-16 21:00:18,139 - INFO - > runner_id: 'testing'\n", + "2023-02-16 21:00:18,139 - INFO - > meta_file: '/home/localek10/workspace/monai/GenerativeModels/model-zoo/models/mednist_ddpm/bundle/configs/metadata.json'\n", + "2023-02-16 21:00:18,139 - INFO - > config_file: ('/home/localek10/workspace/monai/GenerativeModels/model-zoo/models/mednist_ddpm/bundle/configs/common.yaml',\n", + " '/home/localek10/workspace/monai/GenerativeModels/model-zoo/models/mednist_ddpm/bundle/configs/infer.yaml')\n", + "2023-02-16 21:00:18,139 - INFO - > ckpt_path: './results/output_230215_174009/model_final_iteration=75000.pt'\n", + "2023-02-16 21:00:18,140 - INFO - > bundle_root: '/home/localek10/workspace/monai/GenerativeModels/model-zoo/models/mednist_ddpm/bundle'\n", + "2023-02-16 21:00:18,140 - INFO - > out_file: 'test.pt'\n", + "2023-02-16 21:00:18,140 - INFO - ---\n", + "\n", + "\n", + "100%|███████████████████████████████████████| 1000/1000 [00:10<00:00, 97.10it/s]\n", + "[[[], []], null]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "configs=f\"'{bundle_root}/configs/common.yaml', '{bundle_root}/configs/infer.yaml'\"\n", + "\n", + "!PYTHONPATH={bundle_root} python -m monai.bundle run testing \\\n", + " --meta_file {bundle_root}/configs/metadata.json \\\n", + " --config_file \"{configs}\" \\\n", + " --ckpt_path ./results/output_230215_174009/model_final_iteration=75000.pt \\\n", + " --bundle_root {bundle_root} \\\n", + " --out_file test.pt\n", + "\n", + "test = torch.load(\"test.pt\", map_location=\"cpu\")\n", + "\n", + "plt.imshow(test[0, 0], vmin=0, vmax=1, cmap=\"gray\")" + ] + }, + { + "cell_type": "markdown", + "id": "f581c36e-4033-4005-8969-76205470588e", + "metadata": {}, + "source": [ + "The same can be done by creating the parser object, filling in its configuration, then resolving the Python objects from the constructed bundle data:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "cf8438b3-4c7d-48c4-bb41-ed7def73753f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1000/1000 [00:09<00:00, 101.06it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import sys\n", + "\n", + "sys.path.append(bundle_root) # make sure we load the script files we need\n", + "\n", + "# configure the parser from the bundle's information\n", + "cp = ConfigParser()\n", + "cp.read_meta(f\"{bundle_root}/configs/metadata.json\")\n", + "cp.read_config([f\"{bundle_root}/configs/common.yaml\", f\"{bundle_root}/configs/infer.yaml\"])\n", + "cp[\"bundle_root\"] = bundle_root\n", + "cp[\"ckpt_path\"] = \"./results/output_230215_174009/model_final_iteration=75000.pt\"\n", + "\n", + "cp.get_parsed_content(\"load_state\") # load the saved state from the checkpoint just be resolving this value\n", + "\n", + "device = cp.get_parsed_content(\"device\") # device used by the bundle\n", + "sample = cp.get_parsed_content(\"sample\") # test sampling function\n", + "\n", + "image_dim = cp[\"image_dim\"] # get the stored dimension value, no need to resolve anything\n", + "\n", + "noise = torch.rand(1, 1, image_dim, image_dim).to(device) # or cp.get_parsed_content(\"noise\")\n", + "\n", + "test = sample(noise)\n", + "\n", + "plt.imshow(test[0, 0].cpu(), vmin=0, vmax=1, cmap=\"gray\")" + ] + }, + { + "cell_type": "markdown", + "id": "2feab4e5-2745-4d35-9eec-a2bb8340cf51", + "metadata": {}, + "source": [ + "Multi-GPU can be enabled by including the `train_multigpu.yaml` configuration file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "173cda1c-ac90-410f-b34d-b6cbb0044c7a", + "metadata": {}, + "outputs": [], + "source": [ + "configs=f\"'{bundle_root}/configs/common.yaml', '{bundle_root}/configs/train.yaml', '{bundle_root}/configs/train_multigpu.yaml'\"\n", + "\n", + "!PYTHONPATH={bundle_root} torchrun --standalone --nnodes=1 --nproc_per_node=2 -m monai.bundle run training \\\n", + " --meta_file {bundle_root}/configs/metadata.json \\\n", + " --config_file \"{configs}\" \\\n", + " --logging_file {bundle_root}/configs/logging.conf \\\n", + " --bundle_root {bundle_root} \\\n", + " --dataset_dir {dataset_dir}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb719023-8250-43c4-ab10-911829332498", + "metadata": {}, + "outputs": [], + "source": [ + "if directory is None:\n", + " shutil.rmtree(dataset_dir)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:monai]", + "language": "python", + "name": "conda-env-monai-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/model-zoo/models/mednist_ddpm/bundle/docs/README.md b/model-zoo/models/mednist_ddpm/bundle/docs/README.md new file mode 100644 index 00000000..6483aff5 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/docs/README.md @@ -0,0 +1,9 @@ + +# MedNIST DDPM Example Bundle + +This implements roughly equivalent code to the "Denoising Diffusion Probabilistic Models with MedNIST Dataset" example notebook. This includes scripts for training with single or multiple GPUs and a visualisation notebook. + +The files included here demonstrate how to use the bundle: + * [2d_ddpm_bundle_tutorial.ipynb](./2d_ddpm_bundle_tutorial.ipynb) - demonstrates command line and in-code invocation of the bundle's training and inference scripts + * [sub_train.sh](sub_train.sh) - SLURM submission script example for training + * [sub_train_multigpu.sh](sub_train_multigpu.sh) - SLURM submission script example for training with multiple GPUs diff --git a/model-zoo/models/mednist_ddpm/bundle/docs/sub_train.sh b/model-zoo/models/mednist_ddpm/bundle/docs/sub_train.sh new file mode 100755 index 00000000..237b16f5 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/docs/sub_train.sh @@ -0,0 +1,34 @@ +#! /bin/bash +#SBATCH --nodes=1 +#SBATCH -J mednist_train +#SBATCH -c 4 +#SBATCH --gres=gpu:1 +#SBATCH --time=2:00:00 +#SBATCH -p small + +set -v + +# change this if run submitted from a different directory +export BUNDLE="$(pwd)/.." + +# have to set PYTHONPATH to find MONAI and GenerativeModels as well as the bundle's script directory +export PYTHONPATH="$HOME/MONAI:$HOME/GenerativeModels:$BUNDLE" + +# change this to load a checkpoint instead of started from scratch +CKPT=none + +CONFIG="'$BUNDLE/configs/common.yaml', '$BUNDLE/configs/train.yaml'" + +# change this to point to where MedNIST is located +DATASET="$(pwd)" + +# it's useful to include the configuration in the log file +cat "$BUNDLE/configs/common.yaml" +cat "$BUNDLE/configs/train.yaml" + +python -m monai.bundle run training \ + --meta_file "$BUNDLE/configs/metadata.json" \ + --config_file "$CONFIG" \ + --logging_file "$BUNDLE/configs/logging.conf" \ + --bundle_root "$BUNDLE" \ + --dataset_dir "$DATASET" diff --git a/model-zoo/models/mednist_ddpm/bundle/docs/sub_train_multigpu.sh b/model-zoo/models/mednist_ddpm/bundle/docs/sub_train_multigpu.sh new file mode 100644 index 00000000..7c424af0 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/docs/sub_train_multigpu.sh @@ -0,0 +1,36 @@ +#! /bin/bash +#SBATCH --nodes=1 +#SBATCH -J mednist_train +#SBATCH -c 4 +#SBATCH --gres=gpu:2 +#SBATCH --time=2:00:00 +#SBATCH -p big + +set -v + +# change this if run submitted from a different directory +export BUNDLE="$(pwd)/.." + +# have to set PYTHONPATH to find MONAI and GenerativeModels as well as the bundle's script directory +export PYTHONPATH="$HOME/MONAI:$HOME/GenerativeModels:$BUNDLE" + +# change this to load a checkpoint instead of started from scratch +CKPT=none + +CONFIG="'$BUNDLE/configs/common.yaml', '$BUNDLE/configs/train.yaml', '$BUNDLE/configs/train_multigpu.yaml'" + +# change this to point to where MedNIST is located +DATASET="$(pwd)" + +# it's useful to include the configuration in the log file +cat "$BUNDLE/configs/common.yaml" +cat "$BUNDLE/configs/train.yaml" +cat "$BUNDLE/configs/train_multigpu.yaml" + +# remember to change arguments to match how many nodes and GPUs you have +torchrun --standalone --nnodes=1 --nproc_per_node=2 -m monai.bundle run training \ + --meta_file "$BUNDLE/configs/metadata.json" \ + --config_file "$CONFIG" \ + --logging_file "$BUNDLE/configs/logging.conf" \ + --bundle_root "$BUNDLE" \ + --dataset_dir "$DATASET" \ No newline at end of file diff --git a/model-zoo/models/mednist_ddpm/bundle/scripts/__init__.py b/model-zoo/models/mednist_ddpm/bundle/scripts/__init__.py new file mode 100644 index 00000000..344830d2 --- /dev/null +++ b/model-zoo/models/mednist_ddpm/bundle/scripts/__init__.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import Dict, Mapping, Optional, Union + +import torch +from monai.engines import PrepareBatch, default_prepare_batch + + +class DiffusionPrepareBatch(PrepareBatch): + """ + This class is used as a callable for the `prepare_batch` parameter of engine classes for diffusion training. + + Assuming a supervised training process, it will generate a noise field using `get_noise` for an input image, and + return the image and noise field as the image/target pair plus the noise field the kwargs under the key "noise". + This assumes the inferer being used in conjunction with this class expects a "noise" parameter to be provided. + + If the `condition_name` is provided, this must refer to a key in the input dictionary containing the condition + field to be passed to the inferer. This will appear in the keyword arguments under the key "condition". + + """ + + def __init__(self, num_train_timesteps: int, condition_name: str | None = None) -> None: + self.condition_name = condition_name + self.num_train_timesteps = num_train_timesteps + + def get_noise(self, images: torch.Tensor) -> torch.Tensor: + """Returns the noise tensor for input tensor `images`, override this for different noise distributions.""" + return torch.randn_like(images) + + def get_timesteps(self, images: torch.Tensor) -> torch.Tensor: + return torch.randint(0, self.num_train_timesteps, (images.shape[0],), device=images.device).long() + + def __call__( + self, + batchdata: Dict[str, torch.Tensor], + device: Union[str, torch.device] | None = None, + non_blocking: bool = False, + **kwargs, + ): + images, _ = default_prepare_batch(batchdata, device, non_blocking, **kwargs) + noise = self.get_noise(images).to(device, non_blocking=non_blocking, **kwargs) + timesteps = self.get_timesteps(images).to(device, non_blocking=non_blocking, **kwargs) + + kwargs = {"noise": noise, "timesteps": timesteps} + + if self.condition_name is not None and isinstance(batchdata, Mapping): + kwargs["conditioning"] = batchdata[self.condition_name].to(device, non_blocking=non_blocking, **kwargs) + + # return input, target, arguments, and keyword arguments where noise is the target and also a keyword value + return images, noise, (), kwargs + + +def inv_metric_cmp_fn(current_metric: float, prev_best: float) -> bool: + """ + This inverts comparison for those metrics which reduce like loss values, such that the lower one is better. + + Args: + current_metric: metric value of current round computation. + prev_best: the best metric value of previous rounds to compare with. + """ + return current_metric < prev_best diff --git a/tests/utils.py b/tests/utils.py index cb1cabdc..601bd9e9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,8 @@ # COPIED FROM https://github.com/Project-MONAI/MONAI/blob/fdd07f36ecb91cfcd491533f4792e1a67a9f89fc/tests/utils.py # --------------------------------------------------------------- +from __future__ import annotations + # Copyright (c) MONAI Consortium # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import annotations import copy