Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dashscope/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
BatchTextEmbeddingResponse
from dashscope.embeddings.multimodal_embedding import (
MultiModalEmbedding, MultiModalEmbeddingItemAudio,
MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText)
MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText, AioMultiModalEmbedding)
from dashscope.embeddings.text_embedding import TextEmbedding
from dashscope.files import Files
from dashscope.models import Models
Expand Down Expand Up @@ -55,6 +55,7 @@
Models,
TextEmbedding,
MultiModalEmbedding,
AioMultiModalEmbedding,
MultiModalEmbeddingItemAudio,
MultiModalEmbeddingItemImage,
MultiModalEmbeddingItemText,
Expand Down
71 changes: 70 additions & 1 deletion dashscope/embeddings/multimodal_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
DictMixin)
from dashscope.client.base_api import BaseApi
from dashscope.client.base_api import BaseApi, BaseAioApi
from dashscope.common.error import InputRequired, ModelRequired
from dashscope.common.utils import _get_task_group_and_task
from dashscope.utils.oss_utils import preprocess_message_element
Expand Down Expand Up @@ -111,3 +111,72 @@ def _preprocess_message_inputs(cls, model: str, input: List[dict],
if is_upload and not has_upload:
has_upload = True
return has_upload


class AioMultiModalEmbedding(BaseAioApi):
task = 'multimodal-embedding'

class Models:
multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'

@classmethod
async def call(cls,
model: str,
input: List[MultiModalEmbeddingItemBase],
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The type hint for input is List[MultiModalEmbeddingItemBase]. However, the provided test code uses input = [{'image': image}], which is a List[dict]. Furthermore, MultiModalEmbeddingItemBase and its subclasses require a factor parameter during initialization, which is not present in the test data. To avoid confusion and accurately reflect the accepted types, consider changing the type hint to List[dict].

Suggested change
input: List[MultiModalEmbeddingItemBase],
input: List[dict],

api_key: str = None,
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Get embedding multimodal contents..

Args:
model (str): The embedding model name.
input (List[MultiModalEmbeddingElement]): The embedding elements,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The type hint for the input parameter in the docstring is List[MultiModalEmbeddingElement], but MultiModalEmbeddingElement is not defined. The method signature uses List[MultiModalEmbeddingItemBase], and as noted in another comment, List[dict] would be even more accurate. Please correct the docstring for consistency and clarity.

Suggested change
input (List[MultiModalEmbeddingElement]): The embedding elements,
input (List[dict]): The embedding elements,

every element include data, modal, factor field.
workspace (str): The dashscope workspace id.
**kwargs:
auto_truncation(bool, `optional`): Automatically truncate
audio longer than 15 seconds or text longer than 70 words.
Default to false(Too long input will result in failure).

Returns:
DashScopeAPIResponse: The embedding result.
"""
if input is None or not input:
raise InputRequired('prompt is required!')
if model is None or not model:
raise ModelRequired('Model is required!')
embedding_input = {}
has_upload = cls._preprocess_message_inputs(model, input, api_key)
if has_upload:
headers = kwargs.pop('headers', {})
headers['X-DashScope-OssResourceResolve'] = 'enable'
kwargs['headers'] = headers
embedding_input['contents'] = input
kwargs.pop('stream', False) # not support streaming output.
task_group, function = _get_task_group_and_task(__name__)
response = await super().call(
model=model,
input=embedding_input,
task_group=task_group,
task=MultiModalEmbedding.task,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

In the call to super().call, the task parameter is set using MultiModalEmbedding.task. It is better practice to use cls.task to refer to the task defined in the current class. This improves code consistency and makes it more robust, for instance, if the class is subclassed.

Suggested change
task=MultiModalEmbedding.task,
task=cls.task,

function=function,
api_key=api_key,
workspace=workspace,
**kwargs)
return response

@classmethod
def _preprocess_message_inputs(cls, model: str, input: List[dict],
api_key: str):
"""preprocess following inputs
input = [{'factor': 1, 'text': 'hello'},
{'factor': 2, 'audio': ''},
{'factor': 3, 'image': ''}]
"""
has_upload = False
for elem in input:
if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
is_upload = preprocess_message_element(model, elem, api_key)
if is_upload and not has_upload:
has_upload = True
return has_upload
Comment on lines +168 to +182
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This method _preprocess_message_inputs is a duplicate of the one in the MultiModalEmbedding class. To improve maintainability and avoid code duplication, you should remove this method and call the existing one from MultiModalEmbedding within the AioMultiModalEmbedding.call method.

For example, in AioMultiModalEmbedding.call, you can change line 149 to:

has_upload = MultiModalEmbedding._preprocess_message_inputs(model, input, api_key)

50 changes: 50 additions & 0 deletions samples/test_multimodal_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import asyncio

import dashscope
import json
from http import HTTPStatus
# 实际使用中请将url地址替换为您的图片url地址
image = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png"

def test_multimodal_embedding():
input = [{'image': image}]
# 调用模型接口
resp = dashscope.MultiModalEmbedding.call(
model="multimodal-embedding-v1",
input=input
)

if resp.status_code == HTTPStatus.OK:
result = {
"status_code": resp.status_code,
"request_id": getattr(resp, "request_id", ""),
"code": getattr(resp, "code", ""),
"message": getattr(resp, "message", ""),
"output": resp.output,
"usage": resp.usage
}
print(json.dumps(result, ensure_ascii=False, indent=4))

async def test_aio_multimodal_embedding():
input = [{'image': image}]
# 调用模型接口
resp = await dashscope.AioMultiModalEmbedding.call(
model="multimodal-embedding-v1",
input=input
)

if resp.status_code == HTTPStatus.OK:
result = {
"status_code": resp.status_code,
"request_id": getattr(resp, "request_id", ""),
"code": getattr(resp, "code", ""),
"message": getattr(resp, "message", ""),
"output": resp.output,
"usage": resp.usage
}
print(json.dumps(result, ensure_ascii=False, indent=4))
Comment on lines +9 to +45
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The response handling logic in test_multimodal_embedding and test_aio_multimodal_embedding is identical. You can refactor this duplicated code into a helper function to improve readability and maintainability. For example:

def _print_response(resp):
    if resp.status_code == HTTPStatus.OK:
        result = {
            "status_code": resp.status_code,
            "request_id": getattr(resp, "request_id", ""),
            "code": getattr(resp, "code", ""),
            "message": getattr(resp, "message", ""),
            "output": resp.output,
            "usage": resp.usage
        }
        print(json.dumps(result, ensure_ascii=False, indent=4))

def test_multimodal_embedding():
    input = [{'image': image}]
    # 调用模型接口
    resp = dashscope.MultiModalEmbedding.call(
        model="multimodal-embedding-v1",
        input=input
    )
    _print_response(resp)

async def test_aio_multimodal_embedding():
    input = [{'image': image}]
    # 调用模型接口
    resp = await dashscope.AioMultiModalEmbedding.call(
        model="multimodal-embedding-v1",
        input=input
    )
    _print_response(resp)



if __name__ == "__main__":
# test_multimodal_embedding()
asyncio.run(test_aio_multimodal_embedding())