diff --git a/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py index dcc98856ddc2..5f5b6584862a 100644 --- a/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py @@ -16,7 +16,6 @@ Processor class for Qwen2.5Omni. """ -import logging import re import numpy as np @@ -301,27 +300,6 @@ def _iter(): return list(_iter()) - def apply_chat_template(self, conversations, chat_template=None, **kwargs): - is_batched = False - if isinstance(conversations[0], dict): - conversations = [conversations] - is_batched = True - - for conversation in conversations: - if ( - conversation[0]["role"] != "system" - or conversation[0]["content"][0]["text"] - != "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech." - ): - logging.warning( - "System prompt modified, audio output may not work as expected. " - + "Audio output mode only works when using default system prompt 'You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech.'" - ) - if is_batched: - conversations = conversations[0] - - return super().apply_chat_template(conversations, chat_template, **kwargs) - def post_process_image_text_to_text(self, generated_outputs, skip_special_tokens=True, **kwargs): """ Post-process the output of a vlm to decode the text. diff --git a/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py index 9ab134377829..f8fa23ee31ba 100644 --- a/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py @@ -318,9 +318,6 @@ def _iter(): return list(_iter()) - def apply_chat_template(self, conversations, chat_template=None, **kwargs): - return super().apply_chat_template(conversations, chat_template, **kwargs) - def post_process_image_text_to_text(self, generated_outputs, skip_special_tokens=True, **kwargs): """ Post-process the output of a vlm to decode the text. @@ -392,5 +389,8 @@ def model_input_names(self): ) ) + def apply_chat_template(self, conversations, chat_template=None, **kwargs): + return super().apply_chat_template(conversations, chat_template, **kwargs) + __all__ = ["Qwen3OmniMoeProcessor"]