diff --git a/dashscope/aigc/image_synthesis.py b/dashscope/aigc/image_synthesis.py index a699523..8d6d245 100644 --- a/dashscope/aigc/image_synthesis.py +++ b/dashscope/aigc/image_synthesis.py @@ -136,26 +136,28 @@ def _get_input(cls, raise InputRequired('prompt is required!') inputs = {PROMPT: prompt} has_upload = False + upload_certificate = None + if negative_prompt is not None: inputs[NEGATIVE_PROMPT] = negative_prompt if images is not None and images and len(images) > 0: new_images = [] for image in images: - is_upload, new_image = check_and_upload_local( - model, image, api_key) + is_upload, new_image, upload_certificate = check_and_upload_local( + model, image, api_key, upload_certificate) if is_upload: has_upload = True new_images.append(new_image) inputs[IMAGES] = new_images if sketch_image_url is not None and sketch_image_url: - is_upload, sketch_image_url = check_and_upload_local( - model, sketch_image_url, api_key) + is_upload, sketch_image_url, upload_certificate = check_and_upload_local( + model, sketch_image_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['sketch_image_url'] = sketch_image_url if ref_img is not None and ref_img: - is_upload, ref_img = check_and_upload_local( - model, ref_img, api_key) + is_upload, ref_img, upload_certificate = check_and_upload_local( + model, ref_img, api_key, upload_certificate) if is_upload: has_upload = True inputs['ref_img'] = ref_img @@ -164,15 +166,15 @@ def _get_input(cls, inputs['function'] = function if mask_image_url is not None and mask_image_url: - is_upload, res_mask_image_url = check_and_upload_local( - model, mask_image_url, api_key) + is_upload, res_mask_image_url, upload_certificate = check_and_upload_local( + model, mask_image_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['mask_image_url'] = res_mask_image_url if base_image_url is not None and base_image_url: - is_upload, res_base_image_url = check_and_upload_local( - model, base_image_url, api_key) + is_upload, res_base_image_url, upload_certificate = check_and_upload_local( + model, base_image_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['base_image_url'] = res_base_image_url diff --git a/dashscope/aigc/multimodal_conversation.py b/dashscope/aigc/multimodal_conversation.py index 361b1f8..6d82115 100644 --- a/dashscope/aigc/multimodal_conversation.py +++ b/dashscope/aigc/multimodal_conversation.py @@ -160,13 +160,15 @@ def _preprocess_messages(cls, model: str, messages: List[dict], ] """ has_upload = False + upload_certificate = None + for message in messages: content = message['content'] for elem in content: if not isinstance(elem, (int, float, bool, str, bytes, bytearray)): - is_upload = preprocess_message_element( - model, elem, api_key) + is_upload, upload_certificate = preprocess_message_element( + model, elem, api_key, upload_certificate) if is_upload and not has_upload: has_upload = True return has_upload @@ -335,13 +337,15 @@ def _preprocess_messages(cls, model: str, messages: List[dict], ] """ has_upload = False + upload_certificate = None + for message in messages: content = message['content'] for elem in content: if not isinstance(elem, (int, float, bool, str, bytes, bytearray)): - is_upload = preprocess_message_element( - model, elem, api_key) + is_upload, upload_certificate = preprocess_message_element( + model, elem, api_key, upload_certificate) if is_upload and not has_upload: has_upload = True return has_upload diff --git a/dashscope/aigc/video_synthesis.py b/dashscope/aigc/video_synthesis.py index 3fb6c56..e18cd70 100644 --- a/dashscope/aigc/video_synthesis.py +++ b/dashscope/aigc/video_synthesis.py @@ -121,45 +121,46 @@ def _get_input(cls, inputs['function'] = function has_upload = False + upload_certificate = None if img_url is not None and img_url: - is_upload, res_img_url = check_and_upload_local( - model, img_url, api_key) + is_upload, res_img_url, upload_certificate = check_and_upload_local( + model, img_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['img_url'] = res_img_url if audio_url is not None and audio_url: - is_upload, res_audio_url = check_and_upload_local( - model, audio_url, api_key) + is_upload, res_audio_url, upload_certificate = check_and_upload_local( + model, audio_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['audio_url'] = res_audio_url if head_frame is not None and head_frame: - is_upload, res_head_frame = check_and_upload_local( - model, head_frame, api_key) + is_upload, res_head_frame, upload_certificate = check_and_upload_local( + model, head_frame, api_key, upload_certificate) if is_upload: has_upload = True inputs['head_frame'] = res_head_frame if tail_frame is not None and tail_frame: - is_upload, res_tail_frame = check_and_upload_local( - model, tail_frame, api_key) + is_upload, res_tail_frame, upload_certificate = check_and_upload_local( + model, tail_frame, api_key, upload_certificate) if is_upload: has_upload = True inputs['tail_frame'] = res_tail_frame if first_frame_url is not None and first_frame_url: - is_upload, res_first_frame_url = check_and_upload_local( - model, first_frame_url, api_key) + is_upload, res_first_frame_url, upload_certificate = check_and_upload_local( + model, first_frame_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['first_frame_url'] = res_first_frame_url if last_frame_url is not None and last_frame_url: - is_upload, res_last_frame_url = check_and_upload_local( - model, last_frame_url, api_key) + is_upload, res_last_frame_url, upload_certificate = check_and_upload_local( + model, last_frame_url, api_key, upload_certificate) if is_upload: has_upload = True inputs['last_frame_url'] = res_last_frame_url diff --git a/dashscope/cli.py b/dashscope/cli.py index e333af6..9e7c47e 100644 --- a/dashscope/cli.py +++ b/dashscope/cli.py @@ -215,10 +215,10 @@ def upload(cls, args): 'DASHSCOPE_API_KEY or pass it as argument by -k/--api_key') return - oss_url = OssUtils.upload(model=args.model, - file_path=file_path, - api_key=api_key, - base_address=args.base_url) + oss_url, _ = OssUtils.upload(model=args.model, + file_path=file_path, + api_key=api_key, + base_address=args.base_url) if not oss_url: print('Failed to upload file: %s' % file_path) diff --git a/dashscope/embeddings/multimodal_embedding.py b/dashscope/embeddings/multimodal_embedding.py index e77dedf..8d9b414 100644 --- a/dashscope/embeddings/multimodal_embedding.py +++ b/dashscope/embeddings/multimodal_embedding.py @@ -105,9 +105,11 @@ def _preprocess_message_inputs(cls, model: str, input: List[dict], {'factor': 3, 'image': ''}] """ has_upload = False + upload_certificate = None for elem in input: if not isinstance(elem, (int, float, bool, str, bytes, bytearray)): - is_upload = preprocess_message_element(model, elem, api_key) + is_upload, upload_certificate = preprocess_message_element( + model, elem, api_key, upload_certificate) if is_upload and not has_upload: has_upload = True return has_upload @@ -174,9 +176,11 @@ def _preprocess_message_inputs(cls, model: str, input: List[dict], {'factor': 3, 'image': ''}] """ has_upload = False + upload_certificate = None for elem in input: if not isinstance(elem, (int, float, bool, str, bytes, bytearray)): - is_upload = preprocess_message_element(model, elem, api_key) + is_upload, upload_certificate = preprocess_message_element( + model, elem, api_key, upload_certificate) if is_upload and not has_upload: has_upload = True return has_upload diff --git a/dashscope/utils/oss_utils.py b/dashscope/utils/oss_utils.py index 54eb12f..a99d993 100644 --- a/dashscope/utils/oss_utils.py +++ b/dashscope/utils/oss_utils.py @@ -35,7 +35,8 @@ def upload(cls, model: str, file_path: str, api_key: str = None, - **kwargs) -> DashScopeAPIResponse: + upload_certificate: dict = None, + **kwargs): """Upload file for model fine-tune or other tasks. Args: @@ -43,16 +44,24 @@ def upload(cls, purpose (str): The purpose of the file[fine-tune|inference] description (str, optional): The file description message. api_key (str, optional): The api key. Defaults to None. + upload_certificate (dict, optional): Reusable upload + certificate. Defaults to None. Returns: - DashScopeAPIResponse: The upload information + tuple: (file_url, upload_certificate) where file_url is the + OSS URL and upload_certificate is the certificate used """ - upload_info = cls.get_upload_certificate(model=model, api_key=api_key, **kwargs) - if upload_info.status_code != HTTPStatus.OK: - raise UploadFileException( - 'Get upload certificate failed, code: %s, message: %s' % - (upload_info.code, upload_info.message)) - upload_info = upload_info.output + if upload_certificate is None: + upload_info = cls.get_upload_certificate(model=model, + api_key=api_key, + **kwargs) + if upload_info.status_code != HTTPStatus.OK: + raise UploadFileException( + 'Get upload certificate failed, code: %s, message: %s' % + (upload_info.code, upload_info.message)) + upload_info = upload_info.output + else: + upload_info = upload_certificate headers = {} headers = {'user-agent': get_user_agent()} headers['Accept'] = 'application/json' @@ -77,7 +86,7 @@ def upload(cls, headers=headers, timeout=3600) if response.status_code == HTTPStatus.OK: - return 'oss://' + form_data['key'] + return 'oss://' + form_data['key'], upload_info else: msg = ( 'Uploading file: %s to oss failed, error: %s' % @@ -103,7 +112,8 @@ def get_upload_certificate(cls, return super().get(None, api_key, params=params, **kwargs) -def upload_file(model: str, upload_path: str, api_key: str): +def upload_file(model: str, upload_path: str, api_key: str, + upload_certificate: dict = None): if upload_path.startswith(FILE_PATH_SCHEMA): parse_result = urlparse(upload_path) if parse_result.netloc: @@ -111,9 +121,10 @@ def upload_file(model: str, upload_path: str, api_key: str): else: file_path = unquote_plus(parse_result.path) if os.path.exists(file_path): - file_url = OssUtils.upload(model=model, - file_path=file_path, - api_key=api_key) + file_url, _ = OssUtils.upload(model=model, + file_path=file_path, + api_key=api_key, + upload_certificate=upload_certificate) if file_url is None: raise UploadFileException('Uploading file: %s failed' % upload_path) @@ -123,20 +134,26 @@ def upload_file(model: str, upload_path: str, api_key: str): return None -def check_and_upload_local(model: str, content: str, api_key: str): +def check_and_upload_local(model: str, content: str, api_key: str, + upload_certificate: dict = None): """Check the content is local file path, upload and return the url Args: model (str): Which model to upload. content (str): The content. api_key (_type_): The api key. + upload_certificate (dict, optional): Reusable upload certificate. + Defaults to None. Raises: UploadFileException: Upload failed. InvalidInput: The input is invalid Returns: - _type_: if upload return True and file_url otherwise False, origin content. + tuple: (is_upload, file_url_or_content, upload_certificate) + where is_upload indicates if file was uploaded, file_url_or_content + is the result URL or original content, and upload_certificate + is the certificate (newly obtained or passed in) """ if content.startswith(FILE_PATH_SCHEMA): parse_result = urlparse(content) @@ -145,32 +162,50 @@ def check_and_upload_local(model: str, content: str, api_key: str): else: file_path = unquote_plus(parse_result.path) if os.path.isfile(file_path): - file_url = OssUtils.upload(model=model, - file_path=file_path, - api_key=api_key) + file_url, cert = OssUtils.upload(model=model, + file_path=file_path, + api_key=api_key, + upload_certificate=upload_certificate) if file_url is None: raise UploadFileException('Uploading file: %s failed' % content) - return True, file_url + return True, file_url, cert else: raise InvalidInput('The file: %s is not exists!' % file_path) elif content.startswith('oss://'): - return True, content + return True, content, upload_certificate elif not content.startswith('http'): content = os.path.expanduser(content) if os.path.isfile(content): - file_url = OssUtils.upload(model=model, - file_path=content, - api_key=api_key) + file_url, cert = OssUtils.upload(model=model, + file_path=content, + api_key=api_key, + upload_certificate=upload_certificate) if file_url is None: raise UploadFileException('Uploading file: %s failed' % content) - return True, file_url - return False, content + return True, file_url, cert + return False, content, upload_certificate + + +def check_and_upload(model, elem: dict, api_key, + upload_certificate: dict = None): + """Check and upload files in element. + Args: + model: Model name + elem: Element dict containing file references + api_key: API key + upload_certificate: Optional upload certificate to reuse -def check_and_upload(model, elem: dict, api_key): + Returns: + tuple: (has_upload, upload_certificate) where has_upload is bool + indicating if any file was uploaded, and upload_certificate + is the certificate (newly obtained or passed in) + """ has_upload = False + obtained_certificate = upload_certificate + for key, content in elem.items(): # support video:[images] for qwen2-vl is_list = isinstance(content, list) @@ -178,16 +213,31 @@ def check_and_upload(model, elem: dict, api_key): if key in ['image', 'video', 'audio', 'text']: for i, content in enumerate(contents): - is_upload, file_url = check_and_upload_local( - model, content, api_key) + is_upload, file_url, obtained_certificate = check_and_upload_local( + model, content, api_key, obtained_certificate) if is_upload: contents[i] = file_url has_upload = True elem[key] = contents if is_list else contents[0] - return has_upload + return has_upload, obtained_certificate + + +def preprocess_message_element(model: str, elem: dict, api_key: str, + upload_certificate: dict = None): + """Preprocess message element and upload files if needed. + Args: + model: Model name + elem: Element dict containing file references + api_key: API key + upload_certificate: Optional upload certificate to reuse -def preprocess_message_element(model: str, elem: dict, api_key: str): - is_upload = check_and_upload(model, elem, api_key) - return is_upload + Returns: + tuple: (is_upload, upload_certificate) where is_upload is bool + indicating if any file was uploaded, and upload_certificate + is the certificate (newly obtained or passed in) + """ + is_upload, cert = check_and_upload(model, elem, api_key, + upload_certificate) + return is_upload, cert diff --git a/samples/test_multimodal_conversation.py b/samples/test_multimodal_conversation.py index 8c4c805..d77048e 100644 --- a/samples/test_multimodal_conversation.py +++ b/samples/test_multimodal_conversation.py @@ -40,6 +40,33 @@ def test_vl_model(): for chunk in response: print(chunk) + @staticmethod + def test_vl_model_with_video(): + """Test MultiModalConversation API with image and text input.""" + # Prepare test messages with image and text + messages = [{"role": "user", + "content": [ + {"video": [ + "/Users/zhiyi/Downloads/vl_data/1.jpg", + "/Users/zhiyi/Downloads/vl_data/2.jpg", + "/Users/zhiyi/Downloads/vl_data/3.jpg", + "/Users/zhiyi/Downloads/vl_data/4.jpg", + ]}, + {"text": "描述这个视频的具体过程"}]}] + + # Call MultiModalConversation API with encryption enabled + response = dashscope.MultiModalConversation.call( + api_key=os.getenv('DASHSCOPE_API_KEY'), + model='qwen-vl-max-latest', + messages=messages, + incremental_output=True, + stream=True, + ) + + print("\n") + for chunk in response: + print(chunk) + @staticmethod def test_vl_model_with_tool_calls(): tools = [ @@ -222,7 +249,8 @@ def test_omni(): if __name__ == "__main__": # TestMultiModalConversation.test_vl_model() - TestMultiModalConversation.test_vl_model_with_tool_calls() + TestMultiModalConversation.test_vl_model_with_video() + # TestMultiModalConversation.test_vl_model_with_tool_calls() # TestMultiModalConversation.test_vl_model_with_reasoning_content() # TestMultiModalConversation.test_vl_ocr() # TestMultiModalConversation.test_qwen_asr()