From e7dfc6f782b516453b5e9a978292409b93d5d777 Mon Sep 17 00:00:00 2001 From: Ignazio De Santis Date: Tue, 31 Mar 2026 03:13:21 +0800 Subject: [PATCH] fix(config): annotate PreTrainedConfig.dtype as Any to fix pydantic schema generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #45070. PreTrainedConfig.dtype was annotated as Union[str, "torch.dtype"] | None. When torch is only imported under TYPE_CHECKING, pydantic's schema builder encounters the "torch.dtype" forward reference at runtime and fails with PydanticUndefinedAnnotation: name 'torch' is not defined. The annotation is changed to Any, which is semantically correct for pydantic's purposes (the field accepts arbitrary values) and avoids the forward-reference resolution failure. The runtime behaviour is unchanged — dtype can still hold str or torch.dtype values. As noted by @zucchini-nlp in the issue, this is the minimal fix. --- src/transformers/configuration_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 5d3d3145ef00..4c1c7eeb9434 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -224,7 +224,11 @@ class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin): # Common attributes for all models output_hidden_states: bool | None = False return_dict: bool | None = True - dtype: Union[str, "torch.dtype"] | None = None + # NOTE: annotated as Any (rather than Union[str, "torch.dtype"] | None) so that pydantic can + # build a schema for classes that include PreTrainedConfig as a field without needing torch + # in the resolution namespace. The runtime type is still Union[str, torch.dtype] | None. + # See https://github.com/huggingface/transformers/issues/45070 + dtype: Any = None chunk_size_feed_forward: int = 0 is_encoder_decoder: bool = False