From 7ee7beca68a35228710b687dbe21ff5917a2ca99 Mon Sep 17 00:00:00 2001 From: Rudrendu Date: Thu, 9 Apr 2026 14:53:14 -0700 Subject: [PATCH 1/2] fix(qwen3_moe): correct return type annotation on Qwen3MoeSparseMoeBlock.forward --- src/transformers/models/qwen3_moe/modeling_qwen3_moe.py | 2 +- src/transformers/models/qwen3_moe/modular_qwen3_moe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py index d63882215609..4dc2ad96c091 100644 --- a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py @@ -278,7 +278,7 @@ def __init__(self, config: Qwen3MoeConfig): self.experts = Qwen3MoeExperts(config) self.gate = Qwen3MoeTopKRouter(config) - def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: batch_size, sequence_length, hidden_dim = hidden_states.shape hidden_states_reshaped = hidden_states.view(-1, hidden_dim) _, routing_weights, selected_experts = self.gate(hidden_states_reshaped) diff --git a/src/transformers/models/qwen3_moe/modular_qwen3_moe.py b/src/transformers/models/qwen3_moe/modular_qwen3_moe.py index cf8741aafe2d..0fd5b451959c 100644 --- a/src/transformers/models/qwen3_moe/modular_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/modular_qwen3_moe.py @@ -66,7 +66,7 @@ def __init__(self, config: Qwen3MoeConfig): self.experts = Qwen3MoeExperts(config) self.gate = Qwen3MoeTopKRouter(config) - def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: batch_size, sequence_length, hidden_dim = hidden_states.shape hidden_states_reshaped = hidden_states.view(-1, hidden_dim) _, routing_weights, selected_experts = self.gate(hidden_states_reshaped) From f0cefbe5580beb35319811c0138cf83a4d0bdf3d Mon Sep 17 00:00:00 2001 From: Rudrendu Date: Fri, 10 Apr 2026 23:43:22 -0700 Subject: [PATCH 2/2] fix: propagate Qwen3MoeSparseMoeBlock forward return type fix to generated vl_moe and omni_moe files Built by Rudrendu Paul, developed with Claude Code --- .../models/qwen3_omni_moe/modeling_qwen3_omni_moe.py | 2 +- src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py index 5141ffc388c8..22529635689e 100644 --- a/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py @@ -1415,7 +1415,7 @@ def __init__(self, config: Qwen3OmniMoeThinkerConfig): self.experts = Qwen3OmniMoeThinkerTextExperts(config) self.gate = Qwen3OmniMoeThinkerTextTopKRouter(config) - def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: batch_size, sequence_length, hidden_dim = hidden_states.shape hidden_states_reshaped = hidden_states.view(-1, hidden_dim) _, routing_weights, selected_experts = self.gate(hidden_states_reshaped) diff --git a/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py b/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py index 6d4c68c1a752..7170645a45aa 100644 --- a/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +++ b/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py @@ -136,7 +136,7 @@ def __init__(self, config: Qwen3VLMoeTextConfig): self.experts = Qwen3VLMoeTextExperts(config) self.gate = Qwen3VLMoeTextTopKRouter(config) - def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: batch_size, sequence_length, hidden_dim = hidden_states.shape hidden_states_reshaped = hidden_states.view(-1, hidden_dim) _, routing_weights, selected_experts = self.gate(hidden_states_reshaped)