Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
bffa963
Automatically update can_return_tuple/check_model_inputs wrapped retu…
tomaarsen Jan 23, 2026
6762069
Add check_decorator_return_types.py script to check against 'tuple | X'
tomaarsen Jan 23, 2026
fa9caf7
Run checking script & fix existing typings
tomaarsen Jan 23, 2026
abaca9c
Fix blt by removing check_model_inputs; matching other classes
tomaarsen Jan 23, 2026
3652612
Use can_return_tuple on altclip
tomaarsen Jan 23, 2026
9d9666a
Add docstring for check_model_inputs / can_return_tuple
tomaarsen Jan 23, 2026
99d5deb
Import ParamSpec via generic as we're in Python 3.10+
tomaarsen Jan 23, 2026
6ad9269
Add check_decorator_return_types to fix-repo & check-repo
tomaarsen Jan 23, 2026
a9afbdb
Remove some dead code
tomaarsen Jan 23, 2026
aa97fc6
Update _decorator_name to not check for our targets yet
tomaarsen Jan 23, 2026
f71a072
Use src/transformers/models path instead
tomaarsen Jan 23, 2026
63155b6
Revert "Use src/transformers/models path instead"
tomaarsen Jan 23, 2026
864a6a7
Use src/transformers/models path instead
tomaarsen Jan 23, 2026
456ee32
Simplify _is_none_annotation
tomaarsen Jan 23, 2026
3b0808e
Explain why super() is skipped
tomaarsen Jan 23, 2026
dbd43d6
Add comments for ParamSpec/TypeVar linking to Python docs
tomaarsen Jan 23, 2026
688be4b
Make style
tomaarsen Jan 23, 2026
3fd9571
Move decorator typing comments around
tomaarsen Jan 23, 2026
155fad7
Merge branch 'main' into feat/auto_decorator_return_typing
tomaarsen Jan 30, 2026
ea9df1d
Merge branch 'main' into feat/auto_decorator_return_typing
tomaarsen Feb 20, 2026
bc81a60
Rerun utils/check_decorator_return_types.py
tomaarsen Feb 20, 2026
b05bc5d
Update incorrect typings on modular classes that inherit decorators
tomaarsen Feb 20, 2026
e3da0ce
Merge branch 'main' into feat/auto_decorator_return_typing
tomaarsen Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ jobs:
- run: python utils/check_config_docstrings.py
- run: python utils/check_config_attributes.py
- run: python utils/check_doctest_list.py
- run: python utils/check_decorator_return_types.py
- run: python utils/update_metadata.py --check-only
- run: python utils/add_dates.py --check-only
- run: >
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/pr-repo-consistency-bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ jobs:
cp utils/check_pipeline_typing.py pr-repo/utils/check_pipeline_typing.py
cp utils/check_doctest_list.py pr-repo/utils/check_doctest_list.py
cp utils/check_docstrings.py pr-repo/utils/check_docstrings.py
cp utils/check_decorator_return_types.py pr-repo/utils/check_decorator_return_types.py
cp utils/add_dates.py pr-repo/utils/add_dates.py

- name: Run repo consistency checks with trusted script
Expand Down Expand Up @@ -197,6 +198,7 @@ jobs:
python utils/check_pipeline_typing.py --fix_and_overwrite
python utils/check_doctest_list.py --fix_and_overwrite
python utils/check_docstrings.py --fix_and_overwrite
python utils/check_decorator_return_types.py --fix_and_overwrite
python utils/add_dates.py

# Check if there are changes
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ The library has 400+ models with many established patterns:
- Search for similar models (e.g., other vision-language models)
- Reuse attention mechanisms, layer implementations, and processing patterns
- Check models like LLaVA, Idefics2, Fuyu for vision-language patterns
- Use provided decorators like (`auto_docstring`, `can_return_tuple`, `check_model_inputs` and `_can_record_outputs`) where relevant.
- Use provided decorators like (`auto_docstring`, `can_return_tuple`, `capture_outputs` and `_can_record_outputs`) where relevant.
- Don't reinvent the wheel

☐ **7. Run quality checks and read the output**
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ check-repo:
-python utils/check_config_docstrings.py
-python utils/check_config_attributes.py
-python utils/check_doctest_list.py
-python utils/check_decorator_return_types.py
-python utils/update_metadata.py --check-only
-python utils/add_dates.py --check-only
-@{ \
Expand All @@ -62,6 +63,7 @@ fix-repo: style
-python utils/check_pipeline_typing.py --fix_and_overwrite
-python utils/check_doctest_list.py --fix_and_overwrite
-python utils/check_docstrings.py --fix_and_overwrite
-python utils/check_decorator_return_types.py --fix_and_overwrite
-python utils/add_dates.py


Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/afmoe/modeling_afmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ def forward(
cache_position: torch.LongTensor | None = None,
use_cache: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | MoeModelOutputWithPast:
) -> MoeModelOutputWithPast:
if (input_ids is None) ^ (inputs_embeds is not None):
raise ValueError("You must specify exactly one of input_ids or inputs_embeds")

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/afmoe/modular_afmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def forward(
cache_position: torch.LongTensor | None = None,
use_cache: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | MoeModelOutputWithPast:
) -> MoeModelOutputWithPast:
if (input_ids is None) ^ (inputs_embeds is not None):
raise ValueError("You must specify exactly one of input_ids or inputs_embeds")

Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/aimv2/modeling_aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def get_text_features(
attention_mask: torch.Tensor | None = None,
position_ids: torch.Tensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down Expand Up @@ -651,7 +651,7 @@ def get_image_features(
pixel_values: torch.FloatTensor,
interpolate_pos_encoding: bool = False,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down
14 changes: 7 additions & 7 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def forward(
position_ids: torch.LongTensor | None = None,
inputs_embeds: torch.FloatTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutputWithPooling | tuple:
) -> BaseModelOutputWithPooling:
if (input_ids is None) ^ (inputs_embeds is not None):
raise ValueError("You must specify exactly one of input_ids or inputs_embeds")

Expand Down Expand Up @@ -466,7 +466,7 @@ def forward(
labels: torch.LongTensor | None = None,
sentence_order_label: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> AlbertForPreTrainingOutput | tuple:
) -> AlbertForPreTrainingOutput:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
Expand Down Expand Up @@ -595,7 +595,7 @@ def forward(
inputs_embeds: torch.FloatTensor | None = None,
labels: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> MaskedLMOutput | tuple:
) -> MaskedLMOutput:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
Expand Down Expand Up @@ -687,7 +687,7 @@ def forward(
inputs_embeds: torch.FloatTensor | None = None,
labels: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> SequenceClassifierOutput | tuple:
) -> SequenceClassifierOutput:
r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
Expand Down Expand Up @@ -769,7 +769,7 @@ def forward(
inputs_embeds: torch.FloatTensor | None = None,
labels: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> TokenClassifierOutput | tuple:
) -> TokenClassifierOutput:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Expand Down Expand Up @@ -826,7 +826,7 @@ def forward(
start_positions: torch.LongTensor | None = None,
end_positions: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> AlbertForPreTrainingOutput | tuple:
) -> AlbertForPreTrainingOutput:
outputs = self.albert(
input_ids=input_ids,
attention_mask=attention_mask,
Expand Down Expand Up @@ -893,7 +893,7 @@ def forward(
inputs_embeds: torch.FloatTensor | None = None,
labels: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> AlbertForPreTrainingOutput | tuple:
) -> AlbertForPreTrainingOutput:
r"""
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
Indices of input sequence tokens in the vocabulary.
Expand Down
12 changes: 6 additions & 6 deletions src/transformers/models/align/modeling_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ def forward(
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
**kwargs: Unpack[TransformersKwargs],
) -> tuple[torch.Tensor] | BaseModelOutput:
) -> BaseModelOutput:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None

Expand Down Expand Up @@ -897,7 +897,7 @@ def forward(
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down Expand Up @@ -1010,7 +1010,7 @@ def forward(
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPoolingAndNoAttention:
) -> BaseModelOutputWithPoolingAndNoAttention:
r"""
Examples:

Expand Down Expand Up @@ -1104,7 +1104,7 @@ def get_text_features(
position_ids: torch.Tensor | None = None,
inputs_embeds: torch.Tensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down Expand Up @@ -1137,7 +1137,7 @@ def get_text_features(
@auto_docstring
def get_image_features(
self, pixel_values: torch.FloatTensor, **kwargs: Unpack[TransformersKwargs]
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down Expand Up @@ -1173,7 +1173,7 @@ def forward(
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | AlignOutput:
) -> AlignOutput:
r"""
return_loss (`bool`, *optional*):
Whether or not to return the contrastive loss.
Expand Down
17 changes: 8 additions & 9 deletions src/transformers/models/altclip/modeling_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def forward(
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
**kwargs: Unpack[TransformersKwargs],
) -> tuple[torch.Tensor] | BaseModelOutput:
) -> BaseModelOutput:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None

Expand Down Expand Up @@ -607,7 +607,7 @@ def forward(
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutput:
) -> BaseModelOutput:
r"""
Args:
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
Expand Down Expand Up @@ -825,7 +825,7 @@ def forward(
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool | None = False,
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
Expand Down Expand Up @@ -949,6 +949,7 @@ def get_input_embeddings(self):
def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value

@can_return_tuple
Copy link
Copy Markdown
Member Author

@tomaarsen tomaarsen Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reviewer note: There are only 2 actual code changes under src/transformers/models, and this is one of them. This class copies from CLAP, which uses @can_return_tuple, but this class did not. I've added it here.

I also updated this class and the CLAP variant to remove the return_dict = return_dict if return_dict is not None else self.config.use_return_dict line, as that was just dead code.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice! Ideally would be great to start using check_model_inputs for pretrained models. Though it might require more manual work than current state of PR

@auto_docstring
# Copied from transformers.models.clap.modeling_clap.ClapTextModel.forward
def forward(
Expand All @@ -960,14 +961,12 @@ def forward(
inputs_embeds: torch.Tensor | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
**kwargs,
) -> tuple[torch.Tensor] | BaseModelOutputWithPoolingAndCrossAttentions:
) -> BaseModelOutputWithPoolingAndCrossAttentions:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
Expand Down Expand Up @@ -1054,7 +1053,7 @@ def forward(
return_dict: bool | None = None,
output_hidden_states: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPoolingAndProjection:
) -> BaseModelOutputWithPoolingAndProjection:
r"""
Examples:

Expand Down Expand Up @@ -1149,7 +1148,7 @@ def get_text_features(
position_ids: torch.Tensor | None = None,
token_type_ids: torch.Tensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down Expand Up @@ -1184,7 +1183,7 @@ def get_image_features(
pixel_values: torch.FloatTensor,
interpolate_pos_encoding: bool = False,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Examples:

Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/aria/modeling_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ def get_image_features(
vision_feature_layer: int = -1,
output_hidden_states: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
patch_attention_mask = self._create_patch_attention_mask(pixel_mask)
image_outputs = self.vision_tower(
pixel_values,
Expand Down Expand Up @@ -985,7 +985,7 @@ def forward(
use_cache: bool | None = None,
cache_position: torch.LongTensor | None = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> tuple | AriaModelOutputWithPast:
) -> AriaModelOutputWithPast:
if inputs_embeds is None:
inputs_embeds = self.get_input_embeddings()(input_ids)

Expand Down Expand Up @@ -1101,7 +1101,7 @@ def forward(
logits_to_keep: int | torch.Tensor = 0,
cache_position: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | AriaCausalLMOutputWithPast:
) -> AriaCausalLMOutputWithPast:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/aria/modular_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,7 @@ def get_image_features(
vision_feature_layer: int = -1,
output_hidden_states: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
patch_attention_mask = self._create_patch_attention_mask(pixel_mask)
image_outputs = self.vision_tower(
pixel_values,
Expand Down Expand Up @@ -1297,7 +1297,7 @@ def forward(
use_cache: bool | None = None,
cache_position: torch.LongTensor | None = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> tuple | AriaModelOutputWithPast:
) -> AriaModelOutputWithPast:
if inputs_embeds is None:
inputs_embeds = self.get_input_embeddings()(input_ids)

Expand Down Expand Up @@ -1376,7 +1376,7 @@ def forward(
logits_to_keep: int | torch.Tensor = 0,
cache_position: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | AriaCausalLMOutputWithPast:
) -> AriaCausalLMOutputWithPast:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def forward(
input_features: torch.Tensor,
input_features_mask: torch.Tensor | None = None,
**kwargs,
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Args:
input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, sequence_length)`):
Expand Down Expand Up @@ -457,7 +457,7 @@ def get_audio_features(
input_features: torch.FloatTensor,
input_features_mask: torch.Tensor,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
input_features (`torch.FloatTensor`):
Float values of mel features extracted from the raw speech waveform. Raw speech waveform can be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def forward(
input_features: torch.Tensor,
input_features_mask: torch.Tensor | None = None,
**kwargs,
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
Args:
input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, sequence_length)`):
Expand Down Expand Up @@ -158,7 +158,7 @@ def get_audio_features(
input_features: torch.FloatTensor,
input_features_mask: torch.Tensor,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
r"""
input_features (`torch.FloatTensor`):
Float values of mel features extracted from the raw speech waveform. Raw speech waveform can be
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/aya_vision/modeling_aya_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def get_image_features(
vision_feature_select_strategy: str | None = None,
output_hidden_states: bool | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | BaseModelOutputWithPooling:
) -> BaseModelOutputWithPooling:
kwargs = {k: v for k, v in kwargs.items() if v is not None}
# this is not memory efficient at all (output_hidden_states=True) will save all the hidden states.
image_outputs = self.vision_tower(
Expand Down Expand Up @@ -257,7 +257,7 @@ def forward(
use_cache: bool | None = None,
cache_position: torch.LongTensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | AyaVisionModelOutputWithPast:
) -> AyaVisionModelOutputWithPast:
if (input_ids is None) ^ (inputs_embeds is not None):
raise ValueError("You must specify exactly one of input_ids or inputs_embeds")

Expand Down Expand Up @@ -357,7 +357,7 @@ def forward(
logits_to_keep: int | torch.Tensor = 0,
image_sizes: torch.Tensor | None = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple | AyaVisionCausalLMOutputWithPast:
) -> AyaVisionCausalLMOutputWithPast:
r"""
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
Expand Down
Loading