diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
index d7bf78fefe87..9b3f30085310 100644
--- a/src/transformers/models/auto/configuration_auto.py
+++ b/src/transformers/models/auto/configuration_auto.py
@@ -399,6 +399,7 @@
         ("zamba", "ZambaConfig"),
         ("zamba2", "Zamba2Config"),
         ("zoedepth", "ZoeDepthConfig"),
+        ("mobilenet_v5", "MobileNetV5Config"),
     ]
 )
 
@@ -804,6 +805,7 @@
         ("zamba", "Zamba"),
         ("zamba2", "Zamba2"),
         ("zoedepth", "ZoeDepth"),
+        ("mobilenet_v5", "MobileNetV5"),
     ]
 )
 
diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py
index 64666456075e..f6401ab91944 100644
--- a/src/transformers/models/auto/image_processing_auto.py
+++ b/src/transformers/models/auto/image_processing_auto.py
@@ -176,6 +176,7 @@
             ("xclip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
             ("yolos", ("YolosImageProcessor", "YolosImageProcessorFast")),
             ("zoedepth", ("ZoeDepthImageProcessor", "ZoeDepthImageProcessorFast")),
+            ("mobilenet_v5", "MobileNetV5ImageProcessor"),
         ]
     )
 
@@ -664,4 +665,7 @@ def register(
         )
 
 
-__all__ = ["IMAGE_PROCESSOR_MAPPING", "AutoImageProcessor"]
+__all__ = [
+    "IMAGE_PROCESSOR_MAPPING",
+    "AutoImageProcessor",
+]
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 075e8e31f15b..49ad985350ae 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -368,6 +368,7 @@
         ("yoso", "YosoModel"),
         ("zamba", "ZambaModel"),
         ("zamba2", "Zamba2Model"),
+        ("mobilenet_v5", "MobileNetV5Model"),
     ]
 )
 
diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py
index 9e3000ad1338..058c7e0c944b 100644
--- a/src/transformers/models/fsmt/modeling_fsmt.py
+++ b/src/transformers/models/fsmt/modeling_fsmt.py
@@ -215,7 +215,7 @@ def _prepare_fsmt_decoder_inputs(
 
 
 @auto_docstring
-class PretrainedFSMTModel(PreTrainedModel):
+class PreTrainedFSMTModel(PreTrainedModel):
     config_class = FSMTConfig
     base_model_prefix = "model"
 
@@ -908,7 +908,7 @@ def _get_shape(t):
 
 
 @auto_docstring
-class FSMTModel(PretrainedFSMTModel):
+class FSMTModel(PreTrainedFSMTModel):
     _tied_weights_keys = ["decoder.embed_tokens.weight", "decoder.output_projection.weight"]
 
     def __init__(self, config: FSMTConfig):
@@ -1067,7 +1067,7 @@ def set_output_embeddings(self, value):
     The FSMT Model with a language modeling head. Can be used for summarization.
     """
 )
-class FSMTForConditionalGeneration(PretrainedFSMTModel, GenerationMixin):
+class FSMTForConditionalGeneration(PreTrainedFSMTModel, GenerationMixin):
     base_model_prefix = "model"
     _tied_weights_keys = ["decoder.embed_tokens.weight", "decoder.output_projection.weight"]
 
@@ -1285,4 +1285,4 @@ def forward(
         return super().forward(positions)
 
 
-__all__ = ["FSMTForConditionalGeneration", "FSMTModel", "PretrainedFSMTModel"]
+__all__ = ["FSMTForConditionalGeneration", "FSMTModel", "PreTrainedFSMTModel"]
diff --git a/src/transformers/models/mobilenet_v5/__init__.py b/src/transformers/models/mobilenet_v5/__init__.py
new file mode 100644
index 000000000000..a414e0d02d75
--- /dev/null
+++ b/src/transformers/models/mobilenet_v5/__init__.py
@@ -0,0 +1,4 @@
+# Init file for MobileNetV5 model package
+from .configuration_mobilenet_v5 import *
+from .modeling_mobilenet_v5 import *
+from .image_processing_mobilenet_v5 import * 
\ No newline at end of file
diff --git a/src/transformers/models/mobilenet_v5/configuration_mobilenet_v5.py b/src/transformers/models/mobilenet_v5/configuration_mobilenet_v5.py
new file mode 100644
index 000000000000..386a09f6f082
--- /dev/null
+++ b/src/transformers/models/mobilenet_v5/configuration_mobilenet_v5.py
@@ -0,0 +1,43 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MobileNetV5 model configuration"""
+
+from ...configuration_utils import PretrainedConfig
+from ...utils import logging
+
+logger = logging.get_logger(__name__)
+
+class MobileNetV5Config(PretrainedConfig):
+    """
+    This is the configuration class to store the configuration of a [`MobileNetV5Model`].
+    It is used to instantiate a MobileNetV5 model according to the specified arguments, defining the model architecture.
+    Instantiating a configuration with the defaults will yield a minimal configuration.
+
+    Example:
+        >>> from transformers import MobileNetV5Config, MobileNetV5Model
+        >>> config = MobileNetV5Config()
+        >>> model = MobileNetV5Model(config)
+    """
+    model_type = "mobilenet_v5"
+
+    def __init__(
+        self,
+        num_channels=3,
+        image_size=224,
+        num_classes=1000,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.num_channels = num_channels
+        self.image_size = image_size
+        self.num_classes = num_classes
+        # TODO: Add more architecture-specific parameters when implementing full support 
\ No newline at end of file
diff --git a/src/transformers/models/mobilenet_v5/image_processing_mobilenet_v5.py b/src/transformers/models/mobilenet_v5/image_processing_mobilenet_v5.py
new file mode 100644
index 000000000000..a00b4b0df863
--- /dev/null
+++ b/src/transformers/models/mobilenet_v5/image_processing_mobilenet_v5.py
@@ -0,0 +1,31 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MobileNetV5 image processor stub implementation"""
+
+from ...image_processing_utils import BaseImageProcessor
+from ...utils import logging
+
+logger = logging.get_logger(__name__)
+
+class MobileNetV5ImageProcessor(BaseImageProcessor):
+    """
+    Minimal stub for MobileNetV5 image processor. Does not implement real preprocessing logic.
+    """
+    model_input_names = ["pixel_values"]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        logger.warning("MobileNetV5ImageProcessor is a stub and does not perform real preprocessing.")
+
+    def preprocess(self, images, **kwargs):
+        # Stub: just return input as is
+        return images 
\ No newline at end of file
diff --git a/src/transformers/models/mobilenet_v5/modeling_mobilenet_v5.py b/src/transformers/models/mobilenet_v5/modeling_mobilenet_v5.py
new file mode 100644
index 000000000000..341a3a86e128
--- /dev/null
+++ b/src/transformers/models/mobilenet_v5/modeling_mobilenet_v5.py
@@ -0,0 +1,35 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MobileNetV5 model stub implementation"""
+
+from ...modeling_utils import PreTrainedModel
+from .configuration_mobilenet_v5 import MobileNetV5Config
+import torch.nn as nn
+
+class MobileNetV5Model(PreTrainedModel):
+    """
+    This class provides a minimal stub for the MobileNetV5 model architecture.
+    Currently, it does not implement any real logic and serves only to avoid 'Unknown Model' errors.
+    Contributions for a full implementation are welcome!
+    """
+    config_class = MobileNetV5Config
+
+    def __init__(self, config: MobileNetV5Config):
+        super().__init__(config)
+        # Minimal stub: a single dummy layer
+        self.dummy = nn.Identity()
+
+    def forward(self, pixel_values=None, **kwargs):
+        # This is a stub. Real implementation required for actual use.
+        if pixel_values is None:
+            raise ValueError("pixel_values must be provided for MobileNetV5Model (stub)")
+        return self.dummy(pixel_values) 
\ No newline at end of file
diff --git a/utils/check_repo.py b/utils/check_repo.py
index 0487e1def262..bcf6c35d1da5 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -1000,7 +1000,7 @@ def find_all_documented_objects() -> list[str]:
     "LineByLineWithSOPTextDataset",
     "NerPipeline",
     "PretrainedBartModel",
-    "PretrainedFSMTModel",
+    "PreTrainedFSMTModel",
     "SingleSentenceClassificationProcessor",
     "SquadDataTrainingArguments",
     "SquadDataset",