From fb2a2a63f253be924d66f105d4ea192d8d4a3884 Mon Sep 17 00:00:00 2001 From: ccsang Date: Sun, 15 Mar 2026 16:09:17 +0000 Subject: [PATCH 1/2] fix(agent): process all content items in CallToolResult, not just the first Fixes #6140 When a tool returns CallToolResult with multiple content items (e.g., both TextContent and ImageContent), the agent was only processing content[0], ignoring the rest. Changes: - Replace direct content[0] access with enumerate(res.content) loop - Process all content items: TextContent, ImageContent, EmbeddedResource - Use content_index for image caching to distinguish multiple images This fixes the issue where tools like Bilibili plugin return both text descriptions and screenshots, but LLM only received one of them. --- .../agent/runners/tool_loop_agent_runner.py | 90 ++++++++++--------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 743b280070..4d99f98049 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -758,51 +758,21 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None: if isinstance(resp, CallToolResult): res = resp _final_resp = resp - if isinstance(res.content[0], TextContent): - _append_tool_call_result( - func_tool_id, - res.content[0].text, - ) - elif isinstance(res.content[0], ImageContent): - # Cache the image instead of sending directly - cached_img = tool_image_cache.save_image( - base64_data=res.content[0].data, - tool_call_id=func_tool_id, - tool_name=func_tool_name, - index=0, - mime_type=res.content[0].mimeType or "image/png", - ) - _append_tool_call_result( - func_tool_id, - ( - f"Image returned and cached at path='{cached_img.file_path}'. " - f"Review the image below. Use send_message_to_user to send it to the user if satisfied, " - f"with type='image' and path='{cached_img.file_path}'." - ), - ) - # Yield image info for LLM visibility (will be handled in step()) - yield _HandleFunctionToolsResult.from_cached_image( - cached_img - ) - elif isinstance(res.content[0], EmbeddedResource): - resource = res.content[0].resource - if isinstance(resource, TextResourceContents): + # Process all content items in the result + for content_index, content in enumerate(res.content): + if isinstance(content, TextContent): _append_tool_call_result( func_tool_id, - resource.text, + content.text, ) - elif ( - isinstance(resource, BlobResourceContents) - and resource.mimeType - and resource.mimeType.startswith("image/") - ): + elif isinstance(content, ImageContent): # Cache the image instead of sending directly cached_img = tool_image_cache.save_image( - base64_data=resource.blob, + base64_data=content.data, tool_call_id=func_tool_id, tool_name=func_tool_name, - index=0, - mime_type=resource.mimeType, + index=content_index, + mime_type=content.mimeType or "image/png", ) _append_tool_call_result( func_tool_id, @@ -812,15 +782,47 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None: f"with type='image' and path='{cached_img.file_path}'." ), ) - # Yield image info for LLM visibility + # Yield image info for LLM visibility (will be handled in step()) yield _HandleFunctionToolsResult.from_cached_image( cached_img ) - else: - _append_tool_call_result( - func_tool_id, - "The tool has returned a data type that is not supported.", - ) + elif isinstance(content, EmbeddedResource): + resource = content.resource + if isinstance(resource, TextResourceContents): + _append_tool_call_result( + func_tool_id, + resource.text, + ) + elif ( + isinstance(resource, BlobResourceContents) + and resource.mimeType + and resource.mimeType.startswith("image/") + ): + # Cache the image instead of sending directly + cached_img = tool_image_cache.save_image( + base64_data=resource.blob, + tool_call_id=func_tool_id, + tool_name=func_tool_name, + index=content_index, + mime_type=resource.mimeType, + ) + _append_tool_call_result( + func_tool_id, + ( + f"Image returned and cached at path='{cached_img.file_path}'. " + f"Review the image below. Use send_message_to_user to send it to the user if satisfied, " + f"with type='image' and path='{cached_img.file_path}'." + ), + ) + # Yield image info for LLM visibility + yield _HandleFunctionToolsResult.from_cached_image( + cached_img + ) + else: + _append_tool_call_result( + func_tool_id, + "The tool has returned a data type that is not supported.", + ) elif resp is None: # Tool 直接请求发送消息给用户 From 8e6c835b85b499be457caf2a3e9d8dbfde3d4c5c Mon Sep 17 00:00:00 2001 From: ccsang Date: Sun, 15 Mar 2026 23:49:02 +0000 Subject: [PATCH 2/2] refactor(agent): extract image-handling logic into helper function Address Sourcery AI review feedback: the image-handling logic was duplicated for ImageContent and EmbeddedResource cases. Changes: - Extract _handle_image_content() helper function - Consolidate image caching, result appending, and yielding logic - Reduce code duplication and improve maintainability --- .../agent/runners/tool_loop_agent_runner.py | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 4d99f98049..9351b20e1a 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -665,6 +665,31 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None: ), ) + def _handle_image_content( + base64_data: str, + mime_type: str, + tool_call_id: str, + tool_name: str, + content_index: int, + ) -> _HandleFunctionToolsResult: + """Helper to cache image and return result for LLM visibility.""" + cached_img = tool_image_cache.save_image( + base64_data=base64_data, + tool_call_id=tool_call_id, + tool_name=tool_name, + index=content_index, + mime_type=mime_type, + ) + _append_tool_call_result( + tool_call_id, + ( + f"Image returned and cached at path='{cached_img.file_path}'. " + f"Review the image below. Use send_message_to_user to send it to the user if satisfied, " + f"with type='image' and path='{cached_img.file_path}'." + ), + ) + return _HandleFunctionToolsResult.from_cached_image(cached_img) + # 执行函数调用 for func_tool_name, func_tool_args, func_tool_id in zip( llm_response.tools_call_name, @@ -767,24 +792,12 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None: ) elif isinstance(content, ImageContent): # Cache the image instead of sending directly - cached_img = tool_image_cache.save_image( + yield _handle_image_content( base64_data=content.data, + mime_type=content.mimeType or "image/png", tool_call_id=func_tool_id, tool_name=func_tool_name, - index=content_index, - mime_type=content.mimeType or "image/png", - ) - _append_tool_call_result( - func_tool_id, - ( - f"Image returned and cached at path='{cached_img.file_path}'. " - f"Review the image below. Use send_message_to_user to send it to the user if satisfied, " - f"with type='image' and path='{cached_img.file_path}'." - ), - ) - # Yield image info for LLM visibility (will be handled in step()) - yield _HandleFunctionToolsResult.from_cached_image( - cached_img + content_index=content_index, ) elif isinstance(content, EmbeddedResource): resource = content.resource @@ -799,24 +812,12 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None: and resource.mimeType.startswith("image/") ): # Cache the image instead of sending directly - cached_img = tool_image_cache.save_image( + yield _handle_image_content( base64_data=resource.blob, + mime_type=resource.mimeType, tool_call_id=func_tool_id, tool_name=func_tool_name, - index=content_index, - mime_type=resource.mimeType, - ) - _append_tool_call_result( - func_tool_id, - ( - f"Image returned and cached at path='{cached_img.file_path}'. " - f"Review the image below. Use send_message_to_user to send it to the user if satisfied, " - f"with type='image' and path='{cached_img.file_path}'." - ), - ) - # Yield image info for LLM visibility - yield _HandleFunctionToolsResult.from_cached_image( - cached_img + content_index=content_index, ) else: _append_tool_call_result(