From 62174bd0127a3df60de0fcd073e5d6248247c8ad Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 4 Dec 2023 21:45:07 +0000 Subject: [PATCH 01/23] Add support for siglip models --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 19 +++++++++++++++++++ optimum/exporters/tasks.py | 13 +++++++++++++ optimum/utils/normalized_config.py | 1 + tests/exporters/exporters_utils.py | 1 + transformers | 1 + 6 files changed, 36 insertions(+) create mode 160000 transformers diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 0ea17b6afe..2d84e376c6 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -83,6 +83,7 @@ Supported architectures: - SEW - SEW-D - Speech2Text +- SigLIP - SpeechT5 - Splinter - SqueezeBert diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index f4d50ad58d..d7c019f9d3 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -806,6 +806,7 @@ class CLIPOnnxConfig(TextAndVisionOnnxConfig): @property def inputs(self) -> Dict[str, Dict[int, str]]: + print('1 get inputs') return { "input_ids": {0: "text_batch_size", 1: "sequence_length"}, "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}, @@ -836,8 +837,10 @@ class CLIPTextWithProjectionOnnxConfig(TextEncoderOnnxConfig): @property def inputs(self) -> Dict[str, Dict[int, str]]: + print('2 get inputs') return { "input_ids": {0: "batch_size", 1: "sequence_length"}, + "attention_mask": {0: "text_batch_size", 1: "sequence_length"}, } @property @@ -876,6 +879,22 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs): return dummy_inputs +class SiglipNormalizedConfig(CLIPNormalizedConfig): + pass + + +class SiglipOnnxConfig(CLIPOnnxConfig): + pass + + +class SiglipTextWithProjectionOnnxConfig(CLIPTextWithProjectionOnnxConfig): + pass + + +class SiglipTextOnnxConfig(CLIPTextOnnxConfig): + pass + + class UNetOnnxConfig(VisionOnnxConfig): ATOL_FOR_VALIDATION = 1e-3 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 7545c72d6c..e7ef41d99b 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -864,6 +864,19 @@ class TasksManager: "audio-classification", onnx="SEWDOnnxConfig", ), + "siglip": supported_tasks_mapping( + "feature-extraction", + "zero-shot-image-classification", + onnx="SiglipOnnxConfig", + ), + "siglip-text-model": supported_tasks_mapping( + "feature-extraction", + onnx="SiglipTextOnnxConfig", + ), + "siglip-text-with-projection": supported_tasks_mapping( + "feature-extraction", + onnx="SiglipTextWithProjectionOnnxConfig", + ), "speech-to-text": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 7a0af9a1a4..e98586e5bc 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -201,6 +201,7 @@ class NormalizedConfigManager: 'perceiver', 'roformer', 'segformer', + 'siglip', 'squeezebert', """ diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 6e43b65e34..abd570a22a 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -129,6 +129,7 @@ "roformer": "hf-internal-testing/tiny-random-RoFormerModel", # "sam": "fxmarty/sam-vit-tiny-random", # TODO: re-enable once PyTorch 2.1 is released, see https://github.com/huggingface/optimum/pull/1301 "segformer": "hf-internal-testing/tiny-random-SegformerModel", + "siglip": "HuggingFaceM4/tiny-random-siglip", "splinter": "hf-internal-testing/tiny-random-SplinterModel", "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "swin": "hf-internal-testing/tiny-random-SwinModel", diff --git a/transformers b/transformers new file mode 160000 index 0000000000..e2e6dc9a6d --- /dev/null +++ b/transformers @@ -0,0 +1 @@ +Subproject commit e2e6dc9a6dfb4665e41a084c45f4e5a34ea32a14 From 3860ac39ea7468cb97be2e76d46a0ad4ab53a3ec Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 4 Dec 2023 21:48:51 +0000 Subject: [PATCH 02/23] cleanup --- optimum/exporters/onnx/model_configs.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index d7c019f9d3..66596a1bf7 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -806,7 +806,6 @@ class CLIPOnnxConfig(TextAndVisionOnnxConfig): @property def inputs(self) -> Dict[str, Dict[int, str]]: - print('1 get inputs') return { "input_ids": {0: "text_batch_size", 1: "sequence_length"}, "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}, @@ -837,10 +836,8 @@ class CLIPTextWithProjectionOnnxConfig(TextEncoderOnnxConfig): @property def inputs(self) -> Dict[str, Dict[int, str]]: - print('2 get inputs') return { "input_ids": {0: "batch_size", 1: "sequence_length"}, - "attention_mask": {0: "text_batch_size", 1: "sequence_length"}, } @property From 3e235380803ccc223e688b4e0ef7a1685b46554f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 4 Dec 2023 21:50:19 +0000 Subject: [PATCH 03/23] remove submodule --- transformers | 1 - 1 file changed, 1 deletion(-) delete mode 160000 transformers diff --git a/transformers b/transformers deleted file mode 160000 index e2e6dc9a6d..0000000000 --- a/transformers +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e2e6dc9a6dfb4665e41a084c45f4e5a34ea32a14 From be9c70790036f6d7111abbe1ed383e232c667689 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 9 Dec 2023 17:03:08 +0000 Subject: [PATCH 04/23] Add ONNX export for DinoV2 models --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 5 +++++ optimum/onnxruntime/modeling_ort.py | 2 +- optimum/utils/normalized_config.py | 1 + tests/exporters/exporters_utils.py | 1 + tests/onnxruntime/test_modeling.py | 1 + tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 8 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 0a5da755a3..f6bd06cde1 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -38,6 +38,7 @@ Supported architectures: - Deberta-v2 - Deit - Detr +- DINOv2 - DistilBert - Donut-Swin - Electra diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index a58f42dca4..2f834f64fd 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -708,6 +708,10 @@ class ConvNextV2OnnxConfig(ViTOnnxConfig): pass +class Dinov2OnnxConfig(ViTOnnxConfig): + pass + + class MobileViTOnnxConfig(ViTOnnxConfig): ATOL_FOR_VALIDATION = 1e-4 diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 4d3f9f98d0..91025b6d2d 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -482,6 +482,11 @@ class TasksManager: "image-segmentation", onnx="DetrOnnxConfig", ), + "dinov2": supported_tasks_mapping( + "feature-extraction", + "image-classification", + onnx="Dinov2OnnxConfig", + ), "distilbert": supported_tasks_mapping( "feature-extraction", "fill-mask", diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index eb9b540480..8d54c9a0f9 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -1534,7 +1534,7 @@ def forward( @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForImageClassification(ORTModel): """ - ONNX Model for image-classification tasks. This class officially supports beit, convnext, convnextv2, data2vec_vision, deit, levit, mobilenet_v1, mobilenet_v2, mobilevit, poolformer, resnet, segformer, swin, vit. + ONNX Model for image-classification tasks. This class officially supports beit, convnext, convnextv2, data2vec_vision, deit, dinov2, levit, mobilenet_v1, mobilenet_v2, mobilevit, poolformer, resnet, segformer, swin, vit. """ auto_model_class = AutoModelForImageClassification diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 6fa4adcecf..38d63cf782 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -190,6 +190,7 @@ class NormalizedConfigManager: 'data2vec-text', 'data2vec-vision', 'detr', + 'dinov2', 'flaubert', 'groupvit', 'ibert', diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 9af7806e7f..7ca67d8f05 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -62,6 +62,7 @@ "deberta": "hf-internal-testing/tiny-random-DebertaModel", "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model", "deit": "hf-internal-testing/tiny-random-DeiTModel", + "dinov2": "hf-internal-testing/tiny-random-Dinov2Model", "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder", "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel", "detr": "hf-internal-testing/tiny-random-DetrModel", # hf-internal-testing/tiny-random-detr is larger diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index bb06e42157..81e1cb3149 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2702,6 +2702,7 @@ class ORTModelForImageClassificationIntegrationTest(ORTModelTestMixin): "convnextv2", "data2vec_vision", "deit", + "dinov2", "levit", "mobilenet_v1", "mobilenet_v2", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 8e579879ea..70be094fec 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -50,6 +50,7 @@ "deit": "hf-internal-testing/tiny-random-DeiTModel", "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder", "detr": "hf-internal-testing/tiny-random-detr", + "dinov2": "hf-internal-testing/tiny-random-Dinov2Model", "distilbert": "hf-internal-testing/tiny-random-DistilBertModel", "electra": "hf-internal-testing/tiny-random-ElectraModel", "encoder-decoder": { From c4d6bc27f1b4f18252fae2151e8f7a7c48668027 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 9 Dec 2023 19:42:40 +0000 Subject: [PATCH 05/23] Use height and width from preprocessor --- optimum/exporters/onnx/model_configs.py | 37 ++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 2f834f64fd..776a1f8506 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -708,8 +708,43 @@ class ConvNextV2OnnxConfig(ViTOnnxConfig): pass +class Dinov2DummyInputGenerator(DummyVisionInputGenerator): + def __init__( + self, + task: str, + normalized_config: NormalizedVisionConfig, + batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"], + num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"], + width: int = DEFAULT_DUMMY_SHAPES["width"], + height: int = DEFAULT_DUMMY_SHAPES["height"], + **kwargs, + ): + super().__init__( + task=task, + normalized_config=normalized_config, + batch_size=batch_size, + num_channels=num_channels, + width=width, + height=height, + **kwargs, + ) + + from transformers.onnx.utils import get_preprocessor + + preprocessor = get_preprocessor(normalized_config._name_or_path) + if preprocessor is not None and hasattr(preprocessor, "crop_size"): + self.height = preprocessor.crop_size.get("height", self.height) + self.width = preprocessor.crop_size.get("width", self.width) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + input_ = super().generate( + input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype + ) + return input_ + + class Dinov2OnnxConfig(ViTOnnxConfig): - pass + DUMMY_INPUT_GENERATOR_CLASSES = (Dinov2DummyInputGenerator, ) class MobileViTOnnxConfig(ViTOnnxConfig): From cb8d362047d8e45c36561254ebff83bd633f2fb6 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 9 Dec 2023 19:48:11 +0000 Subject: [PATCH 06/23] formatting --- optimum/exporters/onnx/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 776a1f8506..a8fa6c1e01 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -744,7 +744,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int class Dinov2OnnxConfig(ViTOnnxConfig): - DUMMY_INPUT_GENERATOR_CLASSES = (Dinov2DummyInputGenerator, ) + DUMMY_INPUT_GENERATOR_CLASSES = (Dinov2DummyInputGenerator,) class MobileViTOnnxConfig(ViTOnnxConfig): From 94c332905cf9f0c3318cddce8e0ea1d3919be89e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sat, 23 Dec 2023 23:12:18 +0000 Subject: [PATCH 07/23] Remove attention mask from model input --- optimum/exporters/onnx/model_configs.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 66596a1bf7..761fceb6f2 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -881,7 +881,16 @@ class SiglipNormalizedConfig(CLIPNormalizedConfig): class SiglipOnnxConfig(CLIPOnnxConfig): - pass + NORMALIZED_CONFIG_CLASS = SiglipNormalizedConfig + DEFAULT_ONNX_OPSET = 13 + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "input_ids": {0: "text_batch_size", 1: "sequence_length"}, + "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}, + # NOTE: No attention_mask + } class SiglipTextWithProjectionOnnxConfig(CLIPTextWithProjectionOnnxConfig): From 8d4b09e1aeb42c02edf01450f3b7be9db171bc9d Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 29 Aug 2024 00:52:03 +0000 Subject: [PATCH 08/23] Add ONNX export support for Hiera models --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 5 +++++ optimum/utils/normalized_config.py | 1 + tests/exporters/exporters_utils.py | 2 ++ tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 6 files changed, 14 insertions(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 747e1396fb..11d0bc4a92 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -52,6 +52,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - GPT-NeoX - OPT - GroupVit +- Hiera - Hubert - IBert - LayoutLM diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 3e11c7e614..81de33116f 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -772,6 +772,10 @@ class ConvNextV2OnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11 +class HieraOnnxConfig(ViTOnnxConfig): + DEFAULT_ONNX_OPSET = 11 + + class MobileViTOnnxConfig(ViTOnnxConfig): ATOL_FOR_VALIDATION = 1e-4 DEFAULT_ONNX_OPSET = 11 diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index f02f176923..01270f0b40 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -705,6 +705,11 @@ class TasksManager: "feature-extraction", onnx="GroupViTOnnxConfig", ), + "hiera": supported_tasks_mapping( + "feature-extraction", + "image-classification", + onnx="HieraOnnxConfig", + ), "hubert": supported_tasks_mapping( "feature-extraction", "automatic-speech-recognition", diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b7649..64dc0d7cc9 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -206,6 +206,7 @@ class NormalizedConfigManager: 'detr', 'flaubert', 'groupvit', + 'hiera', 'ibert', 'layoutlm', 'layoutlmv3', diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index a55c7a124d..2af51fc183 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -101,6 +101,7 @@ "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJModel", "groupvit": "hf-internal-testing/tiny-random-groupvit", + "hiera": "hf-internal-testing/tiny-random-HieraForImageClassification", "ibert": "hf-internal-testing/tiny-random-IBertModel", "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel", "levit": "hf-internal-testing/tiny-random-LevitModel", @@ -231,6 +232,7 @@ "gpt-neox": "EleutherAI/gpt-neox-20b", "gptj": "anton-l/gpt-j-tiny-random", # TODO "groupvit": "nvidia/groupvit-gcc-yfcc", + "hiera": "facebook/hiera-tiny-224-in1k-hf", "ibert": "kssteven/ibert-roberta-base", "imagegpt": "openai/imagegpt-small", "levit": "facebook/levit-128S", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index bb6935461d..3dc6be1909 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -105,6 +105,7 @@ "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", + "hiera": "hf-internal-testing/tiny-random-HieraForImageClassification", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", "levit": "hf-internal-testing/tiny-random-LevitModel", From b96bb6184523d63d62bd5a776264edf452841753 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 29 Aug 2024 13:56:07 +0000 Subject: [PATCH 09/23] Add ONNX export support for SwinV2 --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 6 ++++++ optimum/onnxruntime/modeling_ort.py | 2 +- optimum/onnxruntime/utils.py | 1 + tests/exporters/exporters_utils.py | 2 ++ tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 7 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 11d0bc4a92..908d08b6f3 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -95,6 +95,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - Splinter - SqueezeBert - Swin +- SwinV2 - T5 - Table Transformer - TROCR diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 81de33116f..7670f95b8e 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -817,6 +817,10 @@ class SwinOnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11 +class SwinV2OnnxConfig(SwinOnnxConfig): + pass + + class Swin2srOnnxConfig(SwinOnnxConfig): pass diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 01270f0b40..b771eb731f 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1054,6 +1054,12 @@ class TasksManager: "masked-im", onnx="SwinOnnxConfig", ), + "swinv2": supported_tasks_mapping( + "feature-extraction", + "image-classification", + "masked-im", + onnx="SwinV2OnnxConfig", + ), "swin2sr": supported_tasks_mapping( "feature-extraction", "image-to-image", diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 254b771e33..7e53005ed2 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -1682,7 +1682,7 @@ def forward( @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForImageClassification(ORTModel): """ - ONNX Model for image-classification tasks. This class officially supports beit, convnext, convnextv2, data2vec_vision, deit, levit, mobilenet_v1, mobilenet_v2, mobilevit, poolformer, resnet, segformer, swin, vit. + ONNX Model for image-classification tasks. This class officially supports beit, convnext, convnextv2, data2vec_vision, deit, levit, mobilenet_v1, mobilenet_v2, mobilevit, poolformer, resnet, segformer, swin, swinv2, vit. """ auto_model_class = AutoModelForImageClassification diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py index ad40af92b9..e4c16ae83a 100644 --- a/optimum/onnxruntime/utils.py +++ b/optimum/onnxruntime/utils.py @@ -175,6 +175,7 @@ def check_optimization_supported_model(cls, model_type: str, optimization_config "clip", "vit", "swin", + "swinv2", ] model_type = model_type.replace("_", "-") if (model_type not in cls._conf) or (cls._conf[model_type] not in supported_model_types_for_optimization): diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 2af51fc183..eec4bb8dd2 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -148,6 +148,7 @@ "splinter": "hf-internal-testing/tiny-random-SplinterModel", "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "swin": "hf-internal-testing/tiny-random-SwinModel", + "swinv2": "hf-internal-testing/tiny-random-Swinv2Model", "swin2sr": "hf-internal-testing/tiny-random-Swin2SRModel", "t5": "hf-internal-testing/tiny-random-t5", "table-transformer": "hf-internal-testing/tiny-random-TableTransformerModel", @@ -268,6 +269,7 @@ "splinter": "hf-internal-testing/tiny-random-SplinterModel", "squeezebert": "squeezebert/squeezebert-uncased", "swin": "microsoft/swin-tiny-patch4-window7-224", + "swinv2": "microsoft/swinv2-tiny-patch4-window16-256", "t5": "t5-small", "table-transformer": "microsoft/table-transformer-detection", "vit": "google/vit-base-patch16-224", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 3dc6be1909..3b52194a12 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -144,6 +144,7 @@ "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", "swin": "hf-internal-testing/tiny-random-SwinModel", + "swinv2": "hf-internal-testing/tiny-random-Swinv2Model", "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224", "t5": "hf-internal-testing/tiny-random-t5", "table-transformer": "hf-internal-testing/tiny-random-TableTransformerModel", From fe140c6f106e1d490d1e9cc3a275130d62d5cae9 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 29 Aug 2024 16:23:49 +0000 Subject: [PATCH 10/23] Upgrade Siglip to opset=14 --- optimum/exporters/onnx/model_configs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index b5db3feeff..4d2ced6fc1 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -1087,7 +1087,9 @@ class SiglipNormalizedConfig(CLIPNormalizedConfig): class SiglipOnnxConfig(CLIPOnnxConfig): NORMALIZED_CONFIG_CLASS = SiglipNormalizedConfig - DEFAULT_ONNX_OPSET = 13 + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 13 is not supported. + # Support for this operator was added in version 14, try exporting with this version. + DEFAULT_ONNX_OPSET = 14 @property def inputs(self) -> Dict[str, Dict[int, str]]: From 09ae91af4ac324cf1ccb6ea25413e287bda96b70 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 30 Aug 2024 12:18:16 +0000 Subject: [PATCH 11/23] Add VQA task --- optimum/exporters/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index b02e6b392a..27cf9b6ef2 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -223,6 +223,7 @@ class TasksManager: "text2text-generation": "AutoModelForSeq2SeqLM", "text-classification": "AutoModelForSequenceClassification", "token-classification": "AutoModelForTokenClassification", + "visual-question-answering": "AutoModelForVisualQuestionAnswering", "zero-shot-image-classification": "AutoModelForZeroShotImageClassification", "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection", } From 96afc91dcb7a4b157270f80bd2bd0e2bf023b14b Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 30 Aug 2024 14:35:25 +0000 Subject: [PATCH 12/23] Add ONNX export support for Maskformer --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 16 ++++++++++++++++ optimum/exporters/tasks.py | 7 ++++++- tests/exporters/exporters_utils.py | 2 ++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 869d481840..164118ba9c 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -65,6 +65,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - M2-M100 - Marian - MarkupLM +- Maskformer - MBart - Mistral - MobileBert diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 4d2ced6fc1..1c11d1e554 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -895,6 +895,22 @@ class MobileNetV2OnnxConfig(MobileNetV1OnnxConfig): pass +class MaskformerOnnxConfig(ViTOnnxConfig): + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::einsum' to ONNX opset version 11 is not supported. + # Support for this operator was added in version 12, try exporting with this version. + DEFAULT_ONNX_OPSET = 12 + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + if self.task == "image-segmentation": + return { + "class_queries_logits": {0: "batch_size", 1: "num_queries"}, + "masks_queries_logits": {0: "batch_size", 1: "num_queries", 2: "height", 3: "width"}, + } + else: + return super().outputs + + class DonutSwinOnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11 diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 27cf9b6ef2..192c2dbfb5 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -209,7 +209,7 @@ class TasksManager: "feature-extraction": "AutoModel", "fill-mask": "AutoModelForMaskedLM", "image-classification": "AutoModelForImageClassification", - "image-segmentation": ("AutoModelForImageSegmentation", "AutoModelForSemanticSegmentation"), + "image-segmentation": ("AutoModelForImageSegmentation", "AutoModelForSemanticSegmentation", "AutoModelForInstanceSegmentation", "AutoModelForUniversalSegmentation"), "image-to-image": "AutoModelForImageToImage", "image-to-text": "AutoModelForVision2Seq", "mask-generation": "AutoModel", @@ -797,6 +797,11 @@ class TasksManager: "question-answering", onnx="MarkupLMOnnxConfig", ), + "maskformer": supported_tasks_mapping( + "feature-extraction", + "image-segmentation", + onnx="MaskformerOnnxConfig", + ), "mbart": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 5b7719a921..e96e756725 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -115,6 +115,7 @@ "m2m-100": "hf-internal-testing/tiny-random-m2m_100", "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", + "maskformer": "hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation", "mbart": "hf-internal-testing/tiny-random-mbart", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", @@ -248,6 +249,7 @@ "m2m-100": "hf-internal-testing/tiny-random-m2m_100", # Not using facebook/m2m100_418M because it takes too much time for testing. "marian": "Helsinki-NLP/opus-mt-en-de", "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", + "maskformer": "facebook/maskformer-swin-tiny-coco", "mbart": "sshleifer/tiny-mbart", "mobilebert": "google/mobilebert-uncased", # "mobilenet_v1": "google/mobilenet_v1_0.75_192", From 844aa66e0694a97a06fb9a25bfa582ea3f2de481 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 30 Aug 2024 16:49:19 +0000 Subject: [PATCH 13/23] Add ONNX export support for PVT --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 5 +++++ tests/exporters/exporters_utils.py | 2 ++ tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 5 files changed, 13 insertions(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 164118ba9c..cf83fbeaba 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -83,6 +83,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - Phi3 - Pix2Struct - PoolFormer +- PVT - Qwen2(Qwen1.5) - RegNet - ResNet diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 1c11d1e554..47bfed1267 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -776,6 +776,10 @@ class HieraOnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11 +class PvtOnnxConfig(ViTOnnxConfig): + DEFAULT_ONNX_OPSET = 11 + + class Dinov2DummyInputGenerator(DummyVisionInputGenerator): def __init__( self, diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 192c2dbfb5..2231d66de0 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -974,6 +974,11 @@ class TasksManager: "image-classification", onnx="PoolFormerOnnxConfig", ), + "pvt": supported_tasks_mapping( + "feature-extraction", + "image-classification", + onnx="PvtOnnxConfig", + ), "regnet": supported_tasks_mapping( "feature-extraction", "image-classification", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index e96e756725..5d388715e0 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -140,6 +140,7 @@ "pix2struct": "fxmarty/pix2struct-tiny-random", # "rembert": "google/rembert", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", + "pvt": "hf-internal-testing/tiny-random-PvtForImageClassification", "qwen2": "fxmarty/tiny-dummy-qwen2", "regnet": "hf-internal-testing/tiny-random-RegNetModel", "resnet": "hf-internal-testing/tiny-random-resnet", @@ -264,6 +265,7 @@ "perceiver": "hf-internal-testing/tiny-random-PerceiverModel", # Not using deepmind/language-perceiver because it takes too much time for testing. # "rembert": "google/rembert", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", + "pvt": "hf-internal-testing/tiny-random-PvtForImageClassification", "regnet": "facebook/regnet-y-040", "resnet": "microsoft/resnet-50", "roberta": "roberta-base", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 0e8d42fbcc..947db0d8cd 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -133,6 +133,7 @@ "phi3": "Xenova/tiny-random-Phi3ForCausalLM", "pix2struct": "fxmarty/pix2struct-tiny-random", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", + "pvt": "hf-internal-testing/tiny-random-PvtForImageClassification", "qwen2": "fxmarty/tiny-dummy-qwen2", "resnet": "hf-internal-testing/tiny-random-resnet", "roberta": "hf-internal-testing/tiny-random-RobertaModel", From de07c7aaf512ca45d6635f089dd8763dee77b870 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 30 Aug 2024 17:26:19 +0000 Subject: [PATCH 14/23] Add ONNX export support for ViTMAE and ViTMSN --- docs/source/exporters/onnx/overview.mdx | 2 ++ optimum/exporters/onnx/model_configs.py | 12 ++++++++++++ optimum/exporters/tasks.py | 16 +++++++++++++++- tests/exporters/exporters_utils.py | 2 ++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index cf83fbeaba..195340bbd0 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -107,6 +107,8 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - UniSpeech SAT - Vision Encoder Decoder - Vit +- VitMAE +- VitMSN - Wav2Vec2 - Wav2Vec2 Conformer - WavLM diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 47bfed1267..aced5b7d7f 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -780,6 +780,18 @@ class PvtOnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11 +class VitMAEOnnxConfig(ViTOnnxConfig): + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported. + # Support for this operator was added in version 14, try exporting with this version. + DEFAULT_ONNX_OPSET = 14 + + +class VitMSNOnnxConfig(ViTOnnxConfig): + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported. + # Support for this operator was added in version 14, try exporting with this version. + DEFAULT_ONNX_OPSET = 14 + + class Dinov2DummyInputGenerator(DummyVisionInputGenerator): def __init__( self, diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 2231d66de0..ab6b9bd1d7 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1135,7 +1135,21 @@ class TasksManager: onnx="VisionEncoderDecoderOnnxConfig", ), "vit": supported_tasks_mapping( - "feature-extraction", "image-classification", "masked-im", onnx="ViTOnnxConfig" + "feature-extraction", + "image-classification", + "masked-im", + onnx="ViTOnnxConfig", + ), + "vit-mae": supported_tasks_mapping( + "feature-extraction", + "masked-im", + onnx="VitMAEOnnxConfig", + ), + "vit-msn": supported_tasks_mapping( + "feature-extraction", + "image-classification", + "masked-im", + onnx="VitMSNOnnxConfig", ), "vits": supported_tasks_mapping( "text-to-audio", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 5d388715e0..6983a4e029 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -157,6 +157,7 @@ "t5": "hf-internal-testing/tiny-random-t5", "table-transformer": "hf-internal-testing/tiny-random-TableTransformerModel", "vit": "hf-internal-testing/tiny-random-vit", + "vit-msn": "hf-internal-testing/tiny-random-ViTMSNForImageClassification", "vits": "echarlaix/tiny-random-vits", "yolos": "hf-internal-testing/tiny-random-YolosModel", "whisper": "openai/whisper-tiny.en", # hf-internal-testing ones are broken @@ -279,6 +280,7 @@ "t5": "t5-small", "table-transformer": "microsoft/table-transformer-detection", "vit": "google/vit-base-patch16-224", + "vit-msn": "facebook/vit-msn-small", "yolos": "hustvl/yolos-tiny", "whisper": "openai/whisper-tiny.en", "hubert": "facebook/hubert-base-ls960", From 398d07a918162556129278476b344bb07c4e876e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 14 Nov 2024 19:29:09 +0000 Subject: [PATCH 15/23] Add siglip unit tests --- optimum/exporters/onnx/model_configs.py | 6 ++++++ optimum/exporters/tasks.py | 6 ++++++ tests/exporters/exporters_utils.py | 4 +++- tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index aced5b7d7f..1208354db4 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -1140,6 +1140,12 @@ class SiglipTextOnnxConfig(CLIPTextOnnxConfig): pass +class SiglipVisionModelOnnxConfig(CLIPVisionModelOnnxConfig): + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported. + # Support for this operator was added in version 14, try exporting with this version. + DEFAULT_ONNX_OPSET = 14 + + class UNetOnnxConfig(VisionOnnxConfig): ATOL_FOR_VALIDATION = 1e-3 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index ab6b9bd1d7..af8f272aec 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1051,6 +1051,10 @@ class TasksManager: "feature-extraction", onnx="SiglipTextWithProjectionOnnxConfig", ), + "siglip-vision-model": supported_tasks_mapping( + "feature-extraction", + onnx="SiglipVisionModelOnnxConfig", + ), "speech-to-text": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", @@ -1229,6 +1233,8 @@ class TasksManager: "vae-decoder", "clip-text-model", "clip-text-with-projection", + "siglip-text-model", + "siglip-text-with-projection", "trocr", # supported through the vision-encoder-decoder model type } _SUPPORTED_CLI_MODEL_TYPE = ( diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 6983a4e029..30e3110c6b 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -148,7 +148,8 @@ "roformer": "hf-internal-testing/tiny-random-RoFormerModel", "sam": "fxmarty/sam-vit-tiny-random", "segformer": "hf-internal-testing/tiny-random-SegformerModel", - "siglip": "HuggingFaceM4/tiny-random-siglip", + "siglip": "hf-internal-testing/tiny-random-SiglipModel", + "siglip-vision-model": "hf-internal-testing/tiny-random-SiglipVisionModel", "splinter": "hf-internal-testing/tiny-random-SplinterModel", "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "swin": "hf-internal-testing/tiny-random-SwinModel", @@ -273,6 +274,7 @@ "roformer": "junnyu/roformer_chinese_base", "sam": "facebook/sam-vit-base", "segformer": "nvidia/segformer-b0-finetuned-ade-512-512", + "siglip": "google/siglip-base-patch16-224", "splinter": "hf-internal-testing/tiny-random-SplinterModel", "squeezebert": "squeezebert/squeezebert-uncased", "swin": "microsoft/swin-tiny-patch4-window7-224", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 947db0d8cd..7b5b3ef62a 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -141,6 +141,7 @@ "segformer": "hf-internal-testing/tiny-random-SegformerModel", "sew": "hf-internal-testing/tiny-random-SEWModel", "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h", + "siglip": "hf-internal-testing/tiny-random-SiglipModel", "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "speech_to_text": "hf-internal-testing/tiny-random-Speech2TextModel", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", From 86706d19fe4e4c8871eea743c2299401ad7a5319 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 14 Nov 2024 19:34:19 +0000 Subject: [PATCH 16/23] Add vit-mae unit tests --- optimum/exporters/tasks.py | 1 + tests/exporters/exporters_utils.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index af8f272aec..de287f09e3 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -312,6 +312,7 @@ class TasksManager: "lcm": "text-to-image", "stable-diffusion": "text-to-image", "stable-diffusion-xl": "text-to-image", + "pretraining": "feature-extraction", } _CUSTOM_CLASSES = { diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 30e3110c6b..13e1f9e14b 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -158,6 +158,7 @@ "t5": "hf-internal-testing/tiny-random-t5", "table-transformer": "hf-internal-testing/tiny-random-TableTransformerModel", "vit": "hf-internal-testing/tiny-random-vit", + "vit-mae": "hf-internal-testing/tiny-random-ViTMAEModel", "vit-msn": "hf-internal-testing/tiny-random-ViTMSNForImageClassification", "vits": "echarlaix/tiny-random-vits", "yolos": "hf-internal-testing/tiny-random-YolosModel", @@ -282,6 +283,7 @@ "t5": "t5-small", "table-transformer": "microsoft/table-transformer-detection", "vit": "google/vit-base-patch16-224", + "vit-mae": "facebook/vit-mae-base", "vit-msn": "facebook/vit-msn-small", "yolos": "hustvl/yolos-tiny", "whisper": "openai/whisper-tiny.en", From 8ad2e3adfe42883cfb2e3fd15750e42763fa7481 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 14 Nov 2024 19:58:47 +0000 Subject: [PATCH 17/23] Code formatting --- optimum/exporters/tasks.py | 7 ++++++- optimum/onnxruntime/runs/__init__.py | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index e4b4b1e445..5382991ff0 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -209,7 +209,12 @@ class TasksManager: "feature-extraction": "AutoModel", "fill-mask": "AutoModelForMaskedLM", "image-classification": "AutoModelForImageClassification", - "image-segmentation": ("AutoModelForImageSegmentation", "AutoModelForSemanticSegmentation", "AutoModelForInstanceSegmentation", "AutoModelForUniversalSegmentation"), + "image-segmentation": ( + "AutoModelForImageSegmentation", + "AutoModelForSemanticSegmentation", + "AutoModelForInstanceSegmentation", + "AutoModelForUniversalSegmentation", + ), "image-to-image": "AutoModelForImageToImage", "image-to-text": "AutoModelForVision2Seq", "mask-generation": "AutoModel", diff --git a/optimum/onnxruntime/runs/__init__.py b/optimum/onnxruntime/runs/__init__.py index 1d98294934..d21db2a4ac 100644 --- a/optimum/onnxruntime/runs/__init__.py +++ b/optimum/onnxruntime/runs/__init__.py @@ -110,9 +110,9 @@ def __init__(self, run_config): model_class = FeaturesManager.get_model_class_for_feature(get_autoclass_name(self.task)) self.torch_model = model_class.from_pretrained(run_config["model_name_or_path"]) - self.return_body[ - "model_type" - ] = self.torch_model.config.model_type # return_body is initialized in parent class + self.return_body["model_type"] = ( + self.torch_model.config.model_type + ) # return_body is initialized in parent class def _launch_time(self, trial): batch_size = trial.suggest_categorical("batch_size", self.batch_sizes) From 55a19cb724f515b860b865b5c3390a7e9dc5ba9f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 14 Nov 2024 19:59:10 +0000 Subject: [PATCH 18/23] Add maskformer to list of supported models --- optimum/onnxruntime/modeling_ort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index f1f7b2fe37..a55eb064fa 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -1784,7 +1784,7 @@ def forward( @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) class ORTModelForSemanticSegmentation(ORTModel): """ - ONNX Model for semantic-segmentation, with an all-MLP decode head on top e.g. for ADE20k, CityScapes. This class officially supports segformer. + ONNX Model for semantic-segmentation, with an all-MLP decode head on top e.g. for ADE20k, CityScapes. This class officially supports maskformer, segformer. """ auto_model_class = AutoModelForSemanticSegmentation From fd15bd3e0dbcd6bb6950c46cdd69c34a53975e50 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 14 Nov 2024 20:05:03 +0000 Subject: [PATCH 19/23] Formatting --- optimum/onnxruntime/runs/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/onnxruntime/runs/__init__.py b/optimum/onnxruntime/runs/__init__.py index d21db2a4ac..1d98294934 100644 --- a/optimum/onnxruntime/runs/__init__.py +++ b/optimum/onnxruntime/runs/__init__.py @@ -110,9 +110,9 @@ def __init__(self, run_config): model_class = FeaturesManager.get_model_class_for_feature(get_autoclass_name(self.task)) self.torch_model = model_class.from_pretrained(run_config["model_name_or_path"]) - self.return_body["model_type"] = ( - self.torch_model.config.model_type - ) # return_body is initialized in parent class + self.return_body[ + "model_type" + ] = self.torch_model.config.model_type # return_body is initialized in parent class def _launch_time(self, trial): batch_size = trial.suggest_categorical("batch_size", self.batch_sizes) From 7f0cb92ced010bae2ad448916bf620c3b965da5a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 18 Dec 2024 17:20:43 +0100 Subject: [PATCH 20/23] fix typo --- docs/source/exporters/onnx/overview.mdx | 2 +- optimum/exporters/onnx/model_configs.py | 2 +- optimum/exporters/tasks.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index a434094046..b5129c23f2 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -66,7 +66,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - M2-M100 - Marian - MarkupLM -- Maskformer +- MaskFormer - MBart - MGP-STR - Mistral diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 5c3269e193..771c9de70f 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -986,7 +986,7 @@ class MobileNetV2OnnxConfig(MobileNetV1OnnxConfig): pass -class MaskformerOnnxConfig(ViTOnnxConfig): +class MaskFormerOnnxConfig(ViTOnnxConfig): # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::einsum' to ONNX opset version 11 is not supported. # Support for this operator was added in version 12, try exporting with this version. DEFAULT_ONNX_OPSET = 12 diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index ab48154dda..bd8fd1de15 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -833,7 +833,7 @@ class TasksManager: "maskformer": supported_tasks_mapping( "feature-extraction", "image-segmentation", - onnx="MaskformerOnnxConfig", + onnx="MaskFormerOnnxConfig", ), "mbart": supported_tasks_mapping( "feature-extraction", From 7a2e94a12036a8e8d9172fbf4638f6c976d13600 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 18 Dec 2024 17:21:48 +0100 Subject: [PATCH 21/23] remove vit-mae masked-im task --- optimum/exporters/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index bd8fd1de15..b09ec20a8c 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1205,7 +1205,6 @@ class TasksManager: ), "vit-mae": supported_tasks_mapping( "feature-extraction", - "masked-im", onnx="VitMAEOnnxConfig", ), "vit-msn": supported_tasks_mapping( From 01929b2e8773bfe11791e12f6ebbf7ce5db3c8a9 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 18 Dec 2024 17:23:13 +0100 Subject: [PATCH 22/23] remove vit-msn masked-im task --- optimum/exporters/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index b09ec20a8c..7cb5a31d2d 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -1210,7 +1210,6 @@ class TasksManager: "vit-msn": supported_tasks_mapping( "feature-extraction", "image-classification", - "masked-im", onnx="VitMSNOnnxConfig", ), "vits": supported_tasks_mapping( From 3fa346c21f09c9333663130c784a8f813fa3ac8c Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 18 Dec 2024 17:37:28 +0100 Subject: [PATCH 23/23] fix output names for maskformer export --- optimum/exporters/onnx/model_configs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 771c9de70f..4c5a727a18 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -1001,6 +1001,12 @@ def outputs(self) -> Dict[str, Dict[int, str]]: else: return super().outputs + @property + def torch_to_onnx_output_map(self) -> Dict[str, str]: + return { + "transformer_decoder_last_hidden_state": "last_hidden_state", + } + class DonutSwinOnnxConfig(ViTOnnxConfig): DEFAULT_ONNX_OPSET = 11