From 98f46e854a36e2302428faf5b6686e19be4a0c76 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot Date: Tue, 10 May 2022 13:15:35 +0100 Subject: [PATCH 1/6] Update to bundle specifiation Signed-off-by: Eric Kerfoot --- docs/source/mb_specification.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/source/mb_specification.rst b/docs/source/mb_specification.rst index 1d286052a5..a8b73fb31e 100644 --- a/docs/source/mb_specification.rst +++ b/docs/source/mb_specification.rst @@ -6,7 +6,7 @@ MONAI Bundle Specification Overview ======== -This is the specification for the MONAI Bundle (MB) format of portable described deep learning models. The objective of a MB is to define a packaged network or model which includes the critical information necessary to allow users and programs to understand how the model is used and for what purpose. A bundle includes the stored weights of a model as a pickled state dictionary and/or a Torchscript object. Additional JSON files are included to store metadata about the model, information for constructing training, inference, and post-processing transform sequences, plain-text description, legal information, and other data the model creator wishes to include. +This is the specification for the MONAI Bundle (MB) format of portable described deep learning models. The objective of a MB is to define a packaged network or model which includes the critical information necessary to allow users and programs to understand how the model is used and for what purpose. A bundle includes the stored weights of a single network as a pickled state dictionary plus optionally a Torchscript object and/or an ONNX object. Additional JSON files are included to store metadata about the model, information for constructing training, inference, and post-processing transform sequences, plain-text description, legal information, and other data the model creator wishes to include. This specification defines the directory structure a bundle must have and the necessary files it must contain. Additional files may be included and the directory packaged into a zip file or included as extra files directly in a Torchscript file. @@ -22,26 +22,33 @@ A MONAI Bundle is defined primarily as a directory with a set of specifically na ┃ ┗━ metadata.json ┣━ models ┃ ┣━ model.pt - ┃ ┗━ model.ts + ┃ ┣━ model.ts + ┃ ┗━ model.onnx ┗━ docs ┣━ README.md ┗━ license.txt -These files mostly are required to be present with the given names for the directory to define a valid bundle: +The following files are **required** to be present with the given filenames for the directory to define a valid bundle: * **metadata.json**: metadata information in JSON format relating to the type of model, definition of input and output tensors, versions of the model and used software, and other information described below. * **model.pt**: the state dictionary of a saved model, the information to instantiate the model must be found in the metadata file. + +The following files are optional but must have these names in the directory given above: + * **model.ts**: the Torchscript saved model if the model is compatible with being saved correctly in this format. +* **model.onnx**: the ONNX model if the model is compatible with being saved correctly in this format. * **README.md**: plain-language information on the model, how to use it, author information, etc. in Markdown format. * **license.txt**: software license attached to the model, can be left blank if no license needed. +Other files can be included in any of the above directories. For example, `configs` can contain further configuration JSON or YAML files to define scripts for training or inference, overriding configuration values, environment definitions such as network instantiations, and so forth. One common file to include is `inference.json` which is used to define a basic inference script which uses input files with the stored network to produce prediction output files. + Archive Format ============== -The bundle directory and its contents can be compressed into a zip file to constitute a single file package. When unzipped into a directory this file will reproduce the above directory structure, and should itself also be named after the model it contains. +The bundle directory and its contents can be compressed into a zip file to constitute a single file package. When unzipped into a directory this file will reproduce the above directory structure, and should itself also be named after the model it contains. For example, `ModelName.zip` would contain at least `ModelName/configs/metadata.json` and `ModelName/models/model.pt`, thus when unzipped would place files into the directory `ModelName` rather than into the current working directory. -The Torchscript file format is also just a zip file with a specific structure. When creating such an archive with `save_net_with_metadata` a MB-compliant Torchscript file can be created by including the contents of `metadata.json` as the `meta_values` argument of the function, and other files included as `more_extra_files` entries. These will be stored in a `extras` directory in the zip file and can be retrieved with `load_net_with_metadata` or with any other library/tool that can read zip data. In this format the `model.*` files are obviously not needed by `README.md` and `license.txt` can be added as more extra files. +The Torchscript file format is also just a zip file with a specific structure. When creating such an archive with `save_net_with_metadata` a MB-compliant Torchscript file can be created by including the contents of `metadata.json` as the `meta_values` argument of the function, and other files included as `more_extra_files` entries. These will be stored in a `extras` directory in the zip file and can be retrieved with `load_net_with_metadata` or with any other library/tool that can read zip data. In this format the `model.*` files are obviously not needed but `README.md` and `license.txt` as well as any others provided can be added as more extra files. metadata.json File ================== From 5cfe062974c71f1af963e8f0d432567ad4cc6922 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 May 2022 12:21:59 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/source/mb_specification.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/mb_specification.rst b/docs/source/mb_specification.rst index a8b73fb31e..e5f954b1a2 100644 --- a/docs/source/mb_specification.rst +++ b/docs/source/mb_specification.rst @@ -41,7 +41,7 @@ The following files are optional but must have these names in the directory give * **README.md**: plain-language information on the model, how to use it, author information, etc. in Markdown format. * **license.txt**: software license attached to the model, can be left blank if no license needed. -Other files can be included in any of the above directories. For example, `configs` can contain further configuration JSON or YAML files to define scripts for training or inference, overriding configuration values, environment definitions such as network instantiations, and so forth. One common file to include is `inference.json` which is used to define a basic inference script which uses input files with the stored network to produce prediction output files. +Other files can be included in any of the above directories. For example, `configs` can contain further configuration JSON or YAML files to define scripts for training or inference, overriding configuration values, environment definitions such as network instantiations, and so forth. One common file to include is `inference.json` which is used to define a basic inference script which uses input files with the stored network to produce prediction output files. Archive Format ============== From 096c028736786f355159b3202f18b7d1d6ca054a Mon Sep 17 00:00:00 2001 From: Eric Kerfoot Date: Wed, 11 May 2022 12:05:47 +0100 Subject: [PATCH 3/6] Adding description in spec discussing the saved Torchscript object's file storage behaviour, and tweaking ckpt_export to add .json extension Signed-off-by: Eric Kerfoot --- docs/source/mb_specification.rst | 2 ++ monai/bundle/scripts.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/mb_specification.rst b/docs/source/mb_specification.rst index e5f954b1a2..7ac161aa32 100644 --- a/docs/source/mb_specification.rst +++ b/docs/source/mb_specification.rst @@ -50,6 +50,8 @@ The bundle directory and its contents can be compressed into a zip file to const The Torchscript file format is also just a zip file with a specific structure. When creating such an archive with `save_net_with_metadata` a MB-compliant Torchscript file can be created by including the contents of `metadata.json` as the `meta_values` argument of the function, and other files included as `more_extra_files` entries. These will be stored in a `extras` directory in the zip file and can be retrieved with `load_net_with_metadata` or with any other library/tool that can read zip data. In this format the `model.*` files are obviously not needed but `README.md` and `license.txt` as well as any others provided can be added as more extra files. +The `bundle` submodule of MONAI contains a number of command line programs. To produce a Torchscript bundle use `ckpt_export` with a set of specified components such as the saved weights file and metadata file. Config files can be provided as JSON or YAML dictionaries defining Python constructs used by the `ConfigParser`, however regardless of format the produced bundle Torchscript object will store the files as JSON. + metadata.json File ================== diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index e5b306a90d..3c838d55a0 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -600,10 +600,15 @@ def ckpt_export( filename = os.path.basename(i) # remove extension filename, _ = os.path.splitext(filename) + # because all files are stored as JSON their name parts without extension must be unique if filename in extra_files: - raise ValueError(f"filename '{filename}' is given multiple times in config file list.") + raise ValueError(f"Filename part '{filename}' is given multiple times in config file list.") + # the file may be JSON or YAML but will get loaded and dumped out again as JSON extra_files[filename] = json.dumps(ConfigParser.load_config_file(i)).encode() + # add .json extension to all extra files which are always encoded as JSON + extra_files = {k + ".json": v for k, v in extra_files.items()} + save_net_with_metadata( jit_obj=net, filename_prefix_or_stream=filepath_, From f3637538ddf929fe85bc1c6de5aacb158544376c Mon Sep 17 00:00:00 2001 From: Eric Kerfoot Date: Wed, 11 May 2022 12:07:02 +0100 Subject: [PATCH 4/6] Annotating optional bundle files Signed-off-by: Eric Kerfoot --- docs/source/mb_specification.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/mb_specification.rst b/docs/source/mb_specification.rst index 7ac161aa32..a88096f274 100644 --- a/docs/source/mb_specification.rst +++ b/docs/source/mb_specification.rst @@ -22,11 +22,11 @@ A MONAI Bundle is defined primarily as a directory with a set of specifically na ┃ ┗━ metadata.json ┣━ models ┃ ┣━ model.pt - ┃ ┣━ model.ts - ┃ ┗━ model.onnx + ┃ ┣━ *model.ts + ┃ ┗━ *model.onnx ┗━ docs - ┣━ README.md - ┗━ license.txt + ┣━ *README.md + ┗━ *license.txt The following files are **required** to be present with the given filenames for the directory to define a valid bundle: From f4a6597218863df0eac5750743045577d7b30efa Mon Sep 17 00:00:00 2001 From: Eric Kerfoot Date: Wed, 11 May 2022 12:13:18 +0100 Subject: [PATCH 5/6] Adjusted ckpt_export test Signed-off-by: Eric Kerfoot --- tests/test_bundle_ckpt_export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_bundle_ckpt_export.py b/tests/test_bundle_ckpt_export.py index 36aa7319f0..0051e8d929 100644 --- a/tests/test_bundle_ckpt_export.py +++ b/tests/test_bundle_ckpt_export.py @@ -52,10 +52,10 @@ def test_export(self, key_in_ckpt): subprocess.check_call(cmd) self.assertTrue(os.path.exists(ts_file)) - _, metadata, extra_files = load_net_with_metadata(ts_file, more_extra_files=["inference", "def_args"]) + _, metadata, extra_files = load_net_with_metadata(ts_file, more_extra_files=["inference.json", "def_args.json"]) self.assertTrue("schema" in metadata) - self.assertTrue("meta_file" in json.loads(extra_files["def_args"])) - self.assertTrue("network_def" in json.loads(extra_files["inference"])) + self.assertTrue("meta_file" in json.loads(extra_files["def_args.json"])) + self.assertTrue("network_def" in json.loads(extra_files["inference.json"])) if __name__ == "__main__": From 33723734a62aab363af199e7464b81b2e6ad60a8 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot Date: Wed, 11 May 2022 12:19:27 +0100 Subject: [PATCH 6/6] Fix Signed-off-by: Eric Kerfoot --- tests/test_bundle_ckpt_export.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_bundle_ckpt_export.py b/tests/test_bundle_ckpt_export.py index 0051e8d929..a7cbff22f0 100644 --- a/tests/test_bundle_ckpt_export.py +++ b/tests/test_bundle_ckpt_export.py @@ -52,7 +52,9 @@ def test_export(self, key_in_ckpt): subprocess.check_call(cmd) self.assertTrue(os.path.exists(ts_file)) - _, metadata, extra_files = load_net_with_metadata(ts_file, more_extra_files=["inference.json", "def_args.json"]) + _, metadata, extra_files = load_net_with_metadata( + ts_file, more_extra_files=["inference.json", "def_args.json"] + ) self.assertTrue("schema" in metadata) self.assertTrue("meta_file" in json.loads(extra_files["def_args.json"])) self.assertTrue("network_def" in json.loads(extra_files["inference.json"]))