From 81d6ce3de8f4d646460f8ff0421adfa7baf2feba Mon Sep 17 00:00:00 2001 From: badaoui Date: Tue, 3 Mar 2026 09:05:54 +0000 Subject: [PATCH 1/7] qwen2 --- tests/models/qwen2/test_modeling_qwen2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 333af0542ab9..ff522ea67d73 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -175,7 +175,7 @@ def test_model_450m_long_prompt_sdpa(self): @slow def test_speculative_generation(self): EXPECTED_TEXT_COMPLETION = ( - "My favourite condiment is 100% natural and organic, and I love to use it to make my own sauces." + "My favourite condiment is 100% natural, organic, gluten-free, vegan, and vegetarian. I have been making" ) prompt = "My favourite condiment is " tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B", use_fast=False) From eba6b6915c15ce5974de2aef6c958a6611120cb9 Mon Sep 17 00:00:00 2001 From: badaoui Date: Wed, 11 Mar 2026 16:44:10 +0000 Subject: [PATCH 2/7] merge --- .../models/qwen2_5_omni/modeling_qwen2_5_omni.py | 6 +++--- .../models/qwen2_5_omni/modular_qwen2_5_omni.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index 8b5fdae661ec..446d26616ff8 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -2527,9 +2527,9 @@ def prepare_inputs_for_generation( ): model_inputs = super().prepare_inputs_for_generation( input_ids, - past_key_values, - attention_mask, - inputs_embeds, + past_key_values=past_key_values, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, use_cache=use_cache, thinker_reply_part=thinker_reply_part, input_text_ids=input_text_ids, diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py index 293dd7978022..31262fe1ae33 100644 --- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py @@ -2445,9 +2445,9 @@ def prepare_inputs_for_generation( ): model_inputs = super().prepare_inputs_for_generation( input_ids, - past_key_values, - attention_mask, - inputs_embeds, + past_key_values=past_key_values, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, use_cache=use_cache, thinker_reply_part=thinker_reply_part, input_text_ids=input_text_ids, From 47b9cfb9ed39a5a205026e895220f5fe9e2006f0 Mon Sep 17 00:00:00 2001 From: badaoui Date: Tue, 3 Mar 2026 10:08:29 +0000 Subject: [PATCH 3/7] qwen_2_audio --- src/transformers/models/qwen2_audio/modeling_qwen2_audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py index f3f94c933881..76f9e5995cc0 100644 --- a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py @@ -788,11 +788,11 @@ def forward( audio_features_mask = audio_features_mask < audio_output_lengths[:, None] audio_features = audio_features[audio_features_mask] - n_audio_tokens = (input_ids == self.config.audio_token_id).sum().item() + n_audio_tokens = (input_ids == self.config.audio_token_id).sum() n_audio_features = audio_features.shape[0] torch_compilable_check( n_audio_tokens == n_audio_features, - f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {n_audio_features}", + lambda: f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {n_audio_features}", ) special_audio_mask = (input_ids == self.config.audio_token_id).to(inputs_embeds.device) special_audio_mask = special_audio_mask.unsqueeze(-1).expand_as(inputs_embeds) From e8dcd2fb1584bfa83743dd7ddd54950f8b54f73c Mon Sep 17 00:00:00 2001 From: badaoui Date: Tue, 10 Mar 2026 13:42:12 +0000 Subject: [PATCH 4/7] fix more --- docker/transformers-pytorch-amd-gpu/Dockerfile | 2 +- tests/models/llava/test_modeling_llava.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/transformers-pytorch-amd-gpu/Dockerfile b/docker/transformers-pytorch-amd-gpu/Dockerfile index ae75adb6807f..2c58491d686a 100644 --- a/docker/transformers-pytorch-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-amd-gpu/Dockerfile @@ -44,7 +44,7 @@ RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \ GPU_ARCHS="gfx942" python setup.py install # GPU_ARCHS builds for MI300, MI325 but not MI355: we would need to add `;gfx950` but it takes too long to build. -RUN python3 -m pip install --no-cache-dir einops +RUN python3 -m pip install --no-cache-dir einops blobfile num2words # timm is required for many vision models tests RUN python3 -m pip install --no-cache-dir timm diff --git a/tests/models/llava/test_modeling_llava.py b/tests/models/llava/test_modeling_llava.py index b7f18b597d6a..b0bcf5afbbbd 100644 --- a/tests/models/llava/test_modeling_llava.py +++ b/tests/models/llava/test_modeling_llava.py @@ -579,7 +579,7 @@ def test_tokenizer_integration(self): fast_tokenizer.add_tokens("", True) prompt = "<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n" - EXPECTED_OUTPUT = ['<|im_start|>', 'sy', 'st', 'em', '\n', 'An', 'sw', 'er', ' ', 'the', ' ', 'qu', 'est', 'ions', '.', '<|im_end|>', '<|im_start|>', 'us', 'er', '\n', '', '\n', 'What', ' ', 'is', ' ', 'sh', 'own', ' ', 'in', ' ', 'th', 'is', ' ', 'im', 'age', '?', '<|im_end|>', '<|im_start|>', 'ass', 'ist', 'ant', '\n'] # fmt: skip + EXPECTED_OUTPUT = ['<|im_start|>', 'system', '\n', 'Answer', '▁the', '▁questions', '.', '<|im_end|>', '<|im_start|>', 'user', '\n', '', '\n', 'What', '▁is', '▁shown', '▁in', '▁this', '▁image', '?', '<|im_end|>', '<|im_start|>', 'ass', 'istant', '\n'] # fmt: skip self.assertEqual(slow_tokenizer.tokenize(prompt), EXPECTED_OUTPUT) self.assertEqual(fast_tokenizer.tokenize(prompt), EXPECTED_OUTPUT) From ec59b941504ce74a3e2f1931ce9d3de12088b53e Mon Sep 17 00:00:00 2001 From: badaoui Date: Wed, 11 Mar 2026 16:30:58 +0000 Subject: [PATCH 5/7] qwen2_5_vl --- .../qwen2_5_vl/test_modeling_qwen2_5_vl.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py index f6798fea15ae..38730ed19c10 100644 --- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py @@ -503,9 +503,22 @@ def test_small_model_integration_test_batch(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', ] # fmt: skip + expected_decoded_texts = Expectations( + { + (None, None): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected' + ], + } + ).get_expectation() # fmt: skip + self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), - EXPECTED_DECODED_TEXT, + expected_decoded_texts, ) @slow @@ -553,6 +566,10 @@ def test_small_model_integration_test_batch_wo_image(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, an AI language model created by Alibaba Cloud. I am designed to assist with various tasks such as answering questions, providing information,' ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' + ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' @@ -594,8 +611,8 @@ def test_small_model_integration_test_batch_different_resolutions(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', ], ("rocm", None): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\n addCriterion\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in' ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', @@ -633,7 +650,7 @@ def test_small_model_integration_test_batch_flashatt2(self): expected_decoded_text = Expectations({ ("cuda", None): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", - ("rocm", (9, 4)): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", + ("rocm", (9, 4)): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in", ("xpu", None): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", }).get_expectation() # fmt: skip @@ -674,8 +691,8 @@ def test_small_model_integration_test_batch_wo_image_flashatt2(self): "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\n�\n\n addCriterion\nI'm sorry, but I don't understand your question. Could you please provide more context or clarify what you're asking", ], ("rocm", (9, 4)): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', @@ -731,6 +748,9 @@ def test_small_model_integration_test_with_video(self): (None, None): [ 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual is wearing athletic attire, including a white', ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual appears to be practicing or warming up,', + ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual appears to be practicing or warming up,', ], From 4a2142d21910498d2bdb0cde8c4f30c3b56b939a Mon Sep 17 00:00:00 2001 From: badaoui Date: Thu, 12 Mar 2026 12:56:38 +0000 Subject: [PATCH 6/7] style --- tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py index 38730ed19c10..e3b3eda4ea12 100644 --- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py @@ -498,11 +498,6 @@ def test_small_model_integration_test_batch(self): # it should not matter whether two images are the same size or not output = model.generate(**inputs, max_new_tokens=30, do_sample=False) - EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - ] # fmt: skip - expected_decoded_texts = Expectations( { (None, None): [ @@ -567,7 +562,7 @@ def test_small_model_integration_test_batch_wo_image(self): 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, an AI language model created by Alibaba Cloud. I am designed to assist with various tasks such as answering questions, providing information,' ], ("rocm", (9, 4)): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' ], ("xpu", None): [ @@ -611,7 +606,7 @@ def test_small_model_integration_test_batch_different_resolutions(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', ], ("rocm", None): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in' ], ("xpu", None): [ @@ -691,7 +686,7 @@ def test_small_model_integration_test_batch_wo_image_flashatt2(self): "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\n�\n\n addCriterion\nI'm sorry, but I don't understand your question. Could you please provide more context or clarify what you're asking", ], ("rocm", (9, 4)): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' ], ("xpu", None): [ From 014f4b9a0a263847c69007acf6ccfdd1964ff546 Mon Sep 17 00:00:00 2001 From: badaoui Date: Thu, 12 Mar 2026 13:32:48 +0000 Subject: [PATCH 7/7] fix --- src/transformers/models/qwen2_audio/modeling_qwen2_audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py index 76f9e5995cc0..f3f94c933881 100644 --- a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py @@ -788,11 +788,11 @@ def forward( audio_features_mask = audio_features_mask < audio_output_lengths[:, None] audio_features = audio_features[audio_features_mask] - n_audio_tokens = (input_ids == self.config.audio_token_id).sum() + n_audio_tokens = (input_ids == self.config.audio_token_id).sum().item() n_audio_features = audio_features.shape[0] torch_compilable_check( n_audio_tokens == n_audio_features, - lambda: f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {n_audio_features}", + f"Audio features and audio tokens do not match, tokens: {n_audio_tokens}, features: {n_audio_features}", ) special_audio_mask = (input_ids == self.config.audio_token_id).to(inputs_embeds.device) special_audio_mask = special_audio_mask.unsqueeze(-1).expand_as(inputs_embeds)