dashscope · kevinlin09 · Sep 15, 2025 · Sep 15, 2025 · gemini-code-assist · Sep 15, 2025
diff --git a/dashscope/__init__.py b/dashscope/__init__.py
@@ -24,7 +24,7 @@
     BatchTextEmbeddingResponse
 from dashscope.embeddings.multimodal_embedding import (
     MultiModalEmbedding, MultiModalEmbeddingItemAudio,
-    MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText)
+    MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText, AioMultiModalEmbedding)
 from dashscope.embeddings.text_embedding import TextEmbedding
 from dashscope.files import Files
 from dashscope.models import Models
@@ -55,6 +55,7 @@
     Models,
     TextEmbedding,
     MultiModalEmbedding,
+    AioMultiModalEmbedding,
     MultiModalEmbeddingItemAudio,
     MultiModalEmbeddingItemImage,
     MultiModalEmbeddingItemText,

diff --git a/dashscope/embeddings/multimodal_embedding.py b/dashscope/embeddings/multimodal_embedding.py
@@ -5,7 +5,7 @@
 
 from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
                                                        DictMixin)
-from dashscope.client.base_api import BaseApi
+from dashscope.client.base_api import BaseApi, BaseAioApi
 from dashscope.common.error import InputRequired, ModelRequired
 from dashscope.common.utils import _get_task_group_and_task
 from dashscope.utils.oss_utils import preprocess_message_element
@@ -111,3 +111,72 @@ def _preprocess_message_inputs(cls, model: str, input: List[dict],
                 if is_upload and not has_upload:
                     has_upload = True
         return has_upload
+
+
+class AioMultiModalEmbedding(BaseAioApi):
+    task = 'multimodal-embedding'
+
+    class Models:
+        multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
+
+    @classmethod
+    async def call(cls,
+         model: str,
+         input: List[MultiModalEmbeddingItemBase],
-         input: List[MultiModalEmbeddingItemBase],
+         input: List[dict],
-         input: List[MultiModalEmbeddingItemBase],
+         input: List[dict],
+         api_key: str = None,
+         workspace: str = None,
+         **kwargs) -> DashScopeAPIResponse:
+        """Get embedding multimodal contents..
+
+        Args:
+            model (str): The embedding model name.
+            input (List[MultiModalEmbeddingElement]): The embedding elements,
-            input (List[MultiModalEmbeddingElement]): The embedding elements,
+            input (List[dict]): The embedding elements,
-            input (List[MultiModalEmbeddingElement]): The embedding elements,
+            input (List[dict]): The embedding elements,
+                every element include data, modal, factor field.
+            workspace (str): The dashscope workspace id.
+            **kwargs:
+                auto_truncation(bool, `optional`): Automatically truncate
+                audio longer than 15 seconds or text longer than 70 words.
+                Default to false(Too long input will result in failure).
+
+        Returns:
+            DashScopeAPIResponse: The embedding result.
+        """
+        if input is None or not input:
+            raise InputRequired('prompt is required!')
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        embedding_input = {}
+        has_upload = cls._preprocess_message_inputs(model, input, api_key)
+        if has_upload:
+            headers = kwargs.pop('headers', {})
+            headers['X-DashScope-OssResourceResolve'] = 'enable'
+            kwargs['headers'] = headers
+        embedding_input['contents'] = input
+        kwargs.pop('stream', False)  # not support streaming output.
+        task_group, function = _get_task_group_and_task(__name__)
+        response = await super().call(
+            model=model,
+            input=embedding_input,
+            task_group=task_group,
+            task=MultiModalEmbedding.task,
-            task=MultiModalEmbedding.task,
+            task=cls.task,
-            task=MultiModalEmbedding.task,
+            task=cls.task,
+            function=function,
+            api_key=api_key,
+            workspace=workspace,
+            **kwargs)
+        return response
+
+    @classmethod
+    def _preprocess_message_inputs(cls, model: str, input: List[dict],
+                                   api_key: str):
+        """preprocess following inputs
+        input = [{'factor': 1, 'text': 'hello'},
+                {'factor': 2, 'audio': ''},
+                {'factor': 3, 'image': ''}]
+        """
+        has_upload = False
+        for elem in input:
+            if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
+                is_upload = preprocess_message_element(model, elem, api_key)
+                if is_upload and not has_upload:
+                    has_upload = True
+        return has_upload
diff --git a/samples/test_multimodal_embedding.py b/samples/test_multimodal_embedding.py
@@ -0,0 +1,50 @@
+import asyncio
+
+import dashscope
+import json
+from http import HTTPStatus
+# 实际使用中请将url地址替换为您的图片url地址
+image = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png"
+
+def test_multimodal_embedding():
+    input = [{'image': image}]
+    # 调用模型接口
+    resp = dashscope.MultiModalEmbedding.call(
+        model="multimodal-embedding-v1",
+        input=input
+    )
+
+    if resp.status_code == HTTPStatus.OK:
+        result = {
+            "status_code": resp.status_code,
+            "request_id": getattr(resp, "request_id", ""),
+            "code": getattr(resp, "code", ""),
+            "message": getattr(resp, "message", ""),
+            "output": resp.output,
+            "usage": resp.usage
+        }
+        print(json.dumps(result, ensure_ascii=False, indent=4))
+
+async def test_aio_multimodal_embedding():
+    input = [{'image': image}]
+    # 调用模型接口
+    resp = await dashscope.AioMultiModalEmbedding.call(
+        model="multimodal-embedding-v1",
+        input=input
+    )
+
+    if resp.status_code == HTTPStatus.OK:
+        result = {
+            "status_code": resp.status_code,
+            "request_id": getattr(resp, "request_id", ""),
+            "code": getattr(resp, "code", ""),
+            "message": getattr(resp, "message", ""),
+            "output": resp.output,
+            "usage": resp.usage
+        }
+        print(json.dumps(result, ensure_ascii=False, indent=4))
+
+
+if __name__ == "__main__":
+    # test_multimodal_embedding()
+    asyncio.run(test_aio_multimodal_embedding())