From 82ccd483ce15434576d258b17cf73f2756eb9e6b Mon Sep 17 00:00:00 2001 From: Yewon Lim Date: Mon, 1 Sep 2025 21:10:16 +0900 Subject: [PATCH 1/3] fix: gas for gemma fixed --- src/transformers/models/gemma3/modular_gemma3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index 6e06671ea0bb..9752decd8321 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -869,6 +869,8 @@ def forward( class Gemma3ForConditionalGeneration(PaliGemmaForConditionalGeneration): + # we are filtering the logits/labels so we shouldn't divide the loss based on num_items_in_batch + accepts_loss_kwargs = False @auto_docstring def forward( self, From 552267e9ab16e16b6f3daa01fc2549599e51d60d Mon Sep 17 00:00:00 2001 From: Yewon Lim Date: Wed, 3 Sep 2025 15:26:37 +0900 Subject: [PATCH 2/3] feat: run fix-copies --- src/transformers/models/gemma3/modeling_gemma3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index 83a1283b56e2..1ba2e5369d0e 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -969,6 +969,8 @@ class Gemma3ForConditionalGeneration(Gemma3PreTrainedModel, GenerationMixin): "^language_model.lm_head": "lm_head", } _tied_weights_keys = ["lm_head.weight"] + # we are filtering the logits/labels so we shouldn't divide the loss based on num_items_in_batch + accepts_loss_kwargs = False def __init__(self, config: Gemma3Config): super().__init__(config) From 7d503965913ba5355759ffbba8b5cf171e38f29c Mon Sep 17 00:00:00 2001 From: Yewon Lim Date: Wed, 3 Sep 2025 15:39:06 +0900 Subject: [PATCH 3/3] feat: added issue label --- src/transformers/models/gemma3/modeling_gemma3.py | 1 + src/transformers/models/gemma3/modular_gemma3.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index c303bb390d95..c00b22156467 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -970,6 +970,7 @@ class Gemma3ForConditionalGeneration(Gemma3PreTrainedModel, GenerationMixin): } _tied_weights_keys = ["lm_head.weight"] # we are filtering the logits/labels so we shouldn't divide the loss based on num_items_in_batch + # Fix: https://github.com/huggingface/transformers/issues/40564 accepts_loss_kwargs = False def __init__(self, config: Gemma3Config): diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index 9752decd8321..18f10fc3ad3d 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -870,7 +870,9 @@ def forward( class Gemma3ForConditionalGeneration(PaliGemmaForConditionalGeneration): # we are filtering the logits/labels so we shouldn't divide the loss based on num_items_in_batch + # Fix: https://github.com/huggingface/transformers/issues/40564 accepts_loss_kwargs = False + @auto_docstring def forward( self,