diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index 5d3d3145ef00..4c1c7eeb9434 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -224,7 +224,11 @@ class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin):
     # Common attributes for all models
     output_hidden_states: bool | None = False
     return_dict: bool | None = True
-    dtype: Union[str, "torch.dtype"] | None = None
+    # NOTE: annotated as Any (rather than Union[str, "torch.dtype"] | None) so that pydantic can
+    # build a schema for classes that include PreTrainedConfig as a field without needing torch
+    # in the resolution namespace. The runtime type is still Union[str, torch.dtype] | None.
+    # See https://github.com/huggingface/transformers/issues/45070
+    dtype: Any = None
     chunk_size_feed_forward: int = 0
     is_encoder_decoder: bool = False