diff --git a/docker/transformers-pytorch-amd-gpu/Dockerfile b/docker/transformers-pytorch-amd-gpu/Dockerfile index ae75adb6807f..2c58491d686a 100644 --- a/docker/transformers-pytorch-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-amd-gpu/Dockerfile @@ -44,7 +44,7 @@ RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \ GPU_ARCHS="gfx942" python setup.py install # GPU_ARCHS builds for MI300, MI325 but not MI355: we would need to add `;gfx950` but it takes too long to build. -RUN python3 -m pip install --no-cache-dir einops +RUN python3 -m pip install --no-cache-dir einops blobfile num2words # timm is required for many vision models tests RUN python3 -m pip install --no-cache-dir timm diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index e2af2d1e5308..1e7b43d9bc44 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -2404,9 +2404,9 @@ def prepare_inputs_for_generation( ): model_inputs = super().prepare_inputs_for_generation( input_ids, - past_key_values, - attention_mask, - inputs_embeds, + past_key_values=past_key_values, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, use_cache=use_cache, thinker_reply_part=thinker_reply_part, input_text_ids=input_text_ids, diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py index c27da0c1a478..833931d1bee4 100644 --- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py @@ -2238,9 +2238,9 @@ def prepare_inputs_for_generation( ): model_inputs = super().prepare_inputs_for_generation( input_ids, - past_key_values, - attention_mask, - inputs_embeds, + past_key_values=past_key_values, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, use_cache=use_cache, thinker_reply_part=thinker_reply_part, input_text_ids=input_text_ids, diff --git a/tests/models/llava/test_modeling_llava.py b/tests/models/llava/test_modeling_llava.py index b7f18b597d6a..b0bcf5afbbbd 100644 --- a/tests/models/llava/test_modeling_llava.py +++ b/tests/models/llava/test_modeling_llava.py @@ -579,7 +579,7 @@ def test_tokenizer_integration(self): fast_tokenizer.add_tokens("", True) prompt = "<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n" - EXPECTED_OUTPUT = ['<|im_start|>', 'sy', 'st', 'em', '\n', 'An', 'sw', 'er', ' ', 'the', ' ', 'qu', 'est', 'ions', '.', '<|im_end|>', '<|im_start|>', 'us', 'er', '\n', '', '\n', 'What', ' ', 'is', ' ', 'sh', 'own', ' ', 'in', ' ', 'th', 'is', ' ', 'im', 'age', '?', '<|im_end|>', '<|im_start|>', 'ass', 'ist', 'ant', '\n'] # fmt: skip + EXPECTED_OUTPUT = ['<|im_start|>', 'system', '\n', 'Answer', '▁the', '▁questions', '.', '<|im_end|>', '<|im_start|>', 'user', '\n', '', '\n', 'What', '▁is', '▁shown', '▁in', '▁this', '▁image', '?', '<|im_end|>', '<|im_start|>', 'ass', 'istant', '\n'] # fmt: skip self.assertEqual(slow_tokenizer.tokenize(prompt), EXPECTED_OUTPUT) self.assertEqual(fast_tokenizer.tokenize(prompt), EXPECTED_OUTPUT) diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 3e77d60e8726..a2a00b5fe52a 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -183,11 +183,9 @@ def test_model_450m_long_prompt_sdpa(self): @slow def test_speculative_generation(self): - EXPECTED_TEXT_COMPLETION = Expectations({ - (None, None): "My favourite condiment is 100% natural and organic, and I love to use it to make my own sauces.", - ("xpu", 3): "My favourite condiment is 100% natural, organic, gluten-free, vegan, and vegetarian. I have been making", - }) # fmt: off - + EXPECTED_TEXT_COMPLETION = ( + "My favourite condiment is 100% natural, organic, gluten-free, vegan, and vegetarian. I have been making" + ) prompt = "My favourite condiment is " tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B", use_fast=False) model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", device_map="auto", dtype=torch.float16) diff --git a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py index 4b5ee6d21323..fd626a60507e 100644 --- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py @@ -508,14 +508,22 @@ def test_small_model_integration_test_batch(self): # it should not matter whether two images are the same size or not output = model.generate(**inputs, max_new_tokens=30, do_sample=False) - EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - ] # fmt: skip + expected_decoded_texts = Expectations( + { + (None, None): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected' + ], + } + ).get_expectation() # fmt: skip self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), - EXPECTED_DECODED_TEXT, + expected_decoded_texts, ) @slow @@ -563,6 +571,10 @@ def test_small_model_integration_test_batch_wo_image(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, an AI language model created by Alibaba Cloud. I am designed to assist with various tasks such as answering questions, providing information,' ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' + ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' @@ -604,8 +616,8 @@ def test_small_model_integration_test_batch_different_resolutions(self): 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in', ], ("rocm", None): [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\n addCriterion\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is often reflected', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in' ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', @@ -643,7 +655,7 @@ def test_small_model_integration_test_batch_flashatt2(self): expected_decoded_text = Expectations({ ("cuda", None): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", - ("rocm", (9, 4)): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", + ("rocm", (9, 4)): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is evident in", ("xpu", None): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", }).get_expectation() # fmt: skip @@ -685,7 +697,7 @@ def test_small_model_integration_test_batch_wo_image_flashatt2(self): ], ("rocm", (9, 4)): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', - "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with a wide range of tasks, from answering questions and' ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', @@ -741,6 +753,9 @@ def test_small_model_integration_test_with_video(self): (None, None): [ 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual is wearing athletic attire, including a white', ], + ("rocm", (9, 4)): [ + 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual appears to be practicing or warming up,', + ], ("xpu", None): [ 'system\nYou are a helpful assistant.\nuser\nWhat is shown in this video?\nassistant\nThe video shows an indoor tennis court with a person standing on the service line, preparing to serve. The individual appears to be practicing or warming up,', ],