From 89d4ecc59b5157a4d1d09da2d456692127eb0405 Mon Sep 17 00:00:00 2001 From: Mohamed Akbarally Date: Wed, 11 Mar 2026 08:29:45 +0000 Subject: [PATCH 1/4] add mcp image support --- src/stirrup/tools/mcp.py | 56 ++++++++++++++++++---- tests/test_mcp_image_smoke.py | 87 +++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 10 deletions(-) create mode 100644 tests/test_mcp_image_smoke.py diff --git a/src/stirrup/tools/mcp.py b/src/stirrup/tools/mcp.py index 399bb0e..18f30a3 100644 --- a/src/stirrup/tools/mcp.py +++ b/src/stirrup/tools/mcp.py @@ -37,7 +37,16 @@ from json_schema_to_pydantic import create_model from pydantic import BaseModel, Field, model_validator -from stirrup.core.models import Tool, ToolProvider, ToolResult, ToolUseCountMetadata +from stirrup.core.models import ( + AudioContentBlock, + Content, + ContentBlock, + ImageContentBlock, + Tool, + ToolProvider, + ToolResult, + ToolUseCountMetadata, +) # MCP imports (optional dependency) try: @@ -45,6 +54,15 @@ from mcp.client.sse import sse_client from mcp.client.stdio import stdio_client from mcp.client.streamable_http import streamablehttp_client + from mcp.types import ( + AudioContent as MCPAudioContent, + ) + from mcp.types import ( + ImageContent as MCPImageContent, + ) + from mcp.types import ( + TextContent as MCPTextContent, + ) except ImportError as e: raise ImportError( "Requires installation of the mcp extra. Install with (for example): `uv pip install stirrup[mcp]` or `uv add stirrup[mcp]`", @@ -345,7 +363,29 @@ def all_tools(self) -> dict[str, list[str]]: """ return {server: [t["name"] for t in tools] for server, tools in self._tools.items()} - async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]) -> str: + def _convert_mcp_content(self, content_blocks: list[Any]) -> Content: + """Convert MCP content blocks into Stirrup content blocks.""" + content: list[ContentBlock] = [] + + for block in content_blocks: + if isinstance(block, MCPTextContent): + content.append(block.text) + continue + if isinstance(block, MCPImageContent): + content.append(ImageContentBlock(data=block.data)) + continue + if isinstance(block, MCPAudioContent): + content.append(AudioContentBlock(data=block.data)) + continue + raise TypeError(f"Unsupported MCP content block: {type(block).__name__}") + + if not content: + return "" + if len(content) == 1 and isinstance(content[0], str): + return content[0] + return content + + async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]) -> Content: """Call a tool on a specific MCP server. Args: @@ -354,7 +394,7 @@ async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any] arguments: Arguments to pass to the tool. Returns: - Tool result as a string (text content extracted from response). + Tool result converted into Stirrup content blocks. Raises: ValueError: If server is not connected. @@ -364,10 +404,7 @@ async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any] raise ValueError(f"Server '{server}' not connected. Available: {self.servers}") result = await session.call_tool(tool_name, arguments) - - # Extract text content from result - text_parts = [str(content.text) for content in result.content if hasattr(content, "text")] - return "\n".join(text_parts) + return self._convert_mcp_content(result.content) def get_all_tools(self) -> list[Tool[Any, ToolUseCountMetadata]]: """Get individual Tool objects for each tool from all connected MCP servers. @@ -403,15 +440,14 @@ async def executor( _tool: str = mcp_tool_name, ) -> ToolResult[ToolUseCountMetadata]: content = await self.call_tool(_server, _tool, params.model_dump()) - xml_content = f"\n{content}\n" - return ToolResult(content=xml_content, metadata=ToolUseCountMetadata()) + return ToolResult(content=content, metadata=ToolUseCountMetadata()) tools.append( Tool( name=unique_name, description=tool_info.get("description") or f"Tool '{mcp_tool_name}' from {server_name}", parameters=params_model, - executor=executor, # ty: ignore[invalid-argument-type] + executor=executor, ) ) diff --git a/tests/test_mcp_image_smoke.py b/tests/test_mcp_image_smoke.py new file mode 100644 index 0000000..67fe367 --- /dev/null +++ b/tests/test_mcp_image_smoke.py @@ -0,0 +1,87 @@ +"""Smoke test for MCP image tool results.""" + +import base64 +import inspect +import sys +from io import BytesIO +from pathlib import Path +from typing import cast + +import pytest +from PIL import Image + +from stirrup.clients.utils import to_openai_messages +from stirrup.core.models import ImageContentBlock, ToolMessage, ToolResult, ToolUseCountMetadata +from stirrup.tools.mcp import MCPConfig, MCPToolProvider + +pytest.importorskip("mcp.server.fastmcp") + + +def _png_b64() -> str: + img = Image.new("RGB", (1, 1), color=(255, 0, 0)) + buffer = BytesIO() + img.save(buffer, format="PNG") + return base64.b64encode(buffer.getvalue()).decode("ascii") + + +def _write_image_server(script_path: Path) -> None: + png_b64 = _png_b64() + script_path.write_text( + f""" +import base64 + +from mcp.server.fastmcp import FastMCP, Image + +mcp = FastMCP("image-server") + + +@mcp.tool() +def read_image() -> Image: + return Image(data=base64.b64decode("{png_b64}"), format="png") + + +if __name__ == "__main__": + mcp.run(transport="stdio") +""".strip() + ) + + +def _make_provider(script_path: Path) -> MCPToolProvider: + config = MCPConfig.model_validate( + { + "mcpServers": { + "image_server": { + "command": sys.executable, + "args": [str(script_path)], + } + } + } + ) + return MCPToolProvider(config=config) + + +async def test_mcp_image_result_reaches_openai_message(tmp_path: Path) -> None: + script_path = tmp_path / "image_server.py" + _write_image_server(script_path) + + provider = _make_provider(script_path) + async with provider as tools: + tool = next(tool for tool in tools if tool.name == "image_server__read_image") + executor_result = tool.executor(tool.parameters()) + raw_result = await executor_result if inspect.isawaitable(executor_result) else executor_result + result = cast(ToolResult[ToolUseCountMetadata], raw_result) + + assert isinstance(result.content, list) + assert len(result.content) == 1 + assert isinstance(result.content[0], ImageContentBlock) + + messages = to_openai_messages( + [ + ToolMessage( + content=result.content, + tool_call_id="call_1", + name="image_server__read_image", + ) + ] + ) + assert messages[0]["content"][0]["type"] == "image_url" From d7239099d831b76da2c67b10ee87f25e394bb3a5 Mon Sep 17 00:00:00 2001 From: Mohamed Akbarally Date: Wed, 11 Mar 2026 08:49:52 +0000 Subject: [PATCH 2/4] add webp support & improve tests --- src/stirrup/core/models.py | 1 + tests/test_agent.py | 50 +++++++++++++++++++++++++++++++++++ tests/test_mcp_image_smoke.py | 41 +++++++++++++++++++++------- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py index c3a9768..2386434 100644 --- a/src/stirrup/core/models.py +++ b/src/stirrup/core/models.py @@ -118,6 +118,7 @@ class ImageContentBlock(BinaryContentBlock): allowed_mime_types: ClassVar[set[str]] = { "image/jpeg", # JPEG "image/png", # PNG + "image/webp", # WebP "image/gif", # GIF "image/bmp", # BMP "image/tiff", # TIFF diff --git a/tests/test_agent.py b/tests/test_agent.py index f8af13e..d2921fb 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -1,5 +1,8 @@ """Tests for agent core functionality.""" +from io import BytesIO + +from PIL import Image from pydantic import BaseModel from stirrup.constants import FINISH_TOOL_NAME @@ -7,6 +10,7 @@ from stirrup.core.models import ( AssistantMessage, ChatMessage, + ImageContentBlock, LLMClient, SummaryMessage, SystemMessage, @@ -42,6 +46,13 @@ async def generate(self, messages: list[ChatMessage], tools: dict[str, Tool]) -> return response +def _sample_png_block() -> ImageContentBlock: + img = Image.new("RGB", (1, 1), color=(255, 0, 0)) + buffer = BytesIO() + img.save(buffer, format="PNG") + return ImageContentBlock(data=buffer.getvalue()) + + async def test_agent_basic_finish() -> None: """Test agent completes successfully when finish tool is called.""" # Create mock responses @@ -212,6 +223,45 @@ def echo_executor(params: EchoParams) -> ToolResult: assert "Echo: Hello" in echo_messages[0].content +async def test_run_tool_preserves_image_content() -> None: + """Test run_tool preserves image blocks returned by tools.""" + + class EmptyParams(BaseModel): + pass + + image_block = _sample_png_block() + + def image_executor(_params: EmptyParams) -> ToolResult: + return ToolResult(content=[image_block]) + + image_tool = Tool[EmptyParams, None]( + name="image_tool", + description="Return an image", + parameters=EmptyParams, + executor=image_executor, # ty: ignore[invalid-argument-type] + ) + + client = MockLLMClient([]) + agent = Agent( + client=client, + name="test-agent", + max_turns=1, + tools=[image_tool], + finish_tool=SIMPLE_FINISH_TOOL, + ) + + async with agent.session() as session: + tool_message = await session.run_tool( + ToolCall(name="image_tool", arguments="{}", tool_call_id="call_1"), + run_metadata={}, + ) + + assert isinstance(tool_message.content, list) + assert len(tool_message.content) == 1 + assert isinstance(tool_message.content[0], ImageContentBlock) + assert tool_message.content[0].mime_type == "image/png" + + async def test_agent_invalid_tool_call() -> None: """Test agent handles invalid tool calls gracefully.""" # Create mock responses diff --git a/tests/test_mcp_image_smoke.py b/tests/test_mcp_image_smoke.py index 67fe367..0069323 100644 --- a/tests/test_mcp_image_smoke.py +++ b/tests/test_mcp_image_smoke.py @@ -17,15 +17,18 @@ pytest.importorskip("mcp.server.fastmcp") -def _png_b64() -> str: +def _image_b64(image_format: str) -> str: + """Build a tiny real image payload for the temp MCP server.""" img = Image.new("RGB", (1, 1), color=(255, 0, 0)) buffer = BytesIO() - img.save(buffer, format="PNG") + img.save(buffer, format=image_format) return base64.b64encode(buffer.getvalue()).decode("ascii") -def _write_image_server(script_path: Path) -> None: - png_b64 = _png_b64() +def _write_image_server(script_path: Path, image_format: str, tool_name: str) -> None: + """Write a one-file stdio MCP server with a single image-returning tool.""" + image_b64 = _image_b64(image_format) + image_ext = image_format.lower() script_path.write_text( f""" import base64 @@ -36,8 +39,8 @@ def _write_image_server(script_path: Path) -> None: @mcp.tool() -def read_image() -> Image: - return Image(data=base64.b64decode("{png_b64}"), format="png") +def {tool_name}() -> Image: + return Image(data=base64.b64decode("{image_b64}"), format="{image_ext}") if __name__ == "__main__": @@ -47,6 +50,7 @@ def read_image() -> Image: def _make_provider(script_path: Path) -> MCPToolProvider: + """Create a provider that launches the temp MCP server over stdio.""" config = MCPConfig.model_validate( { "mcpServers": { @@ -60,28 +64,45 @@ def _make_provider(script_path: Path) -> MCPToolProvider: return MCPToolProvider(config=config) -async def test_mcp_image_result_reaches_openai_message(tmp_path: Path) -> None: +async def _assert_tool_returns_image( + tmp_path: Path, + *, + image_format: str, + tool_name: str, +) -> None: + """Assert the MCP bridge preserves an image through OpenAI-style serialization.""" script_path = tmp_path / "image_server.py" - _write_image_server(script_path) + _write_image_server(script_path, image_format=image_format, tool_name=tool_name) provider = _make_provider(script_path) async with provider as tools: - tool = next(tool for tool in tools if tool.name == "image_server__read_image") + tool = next(tool for tool in tools if tool.name == f"image_server__{tool_name}") executor_result = tool.executor(tool.parameters()) raw_result = await executor_result if inspect.isawaitable(executor_result) else executor_result result = cast(ToolResult[ToolUseCountMetadata], raw_result) + # First prove the MCP bridge produced a real Stirrup image block. assert isinstance(result.content, list) assert len(result.content) == 1 assert isinstance(result.content[0], ImageContentBlock) + assert result.content[0].mime_type == f"image/{image_format.lower()}" + # Then prove the image still survives message serialization for the model layer. messages = to_openai_messages( [ ToolMessage( content=result.content, tool_call_id="call_1", - name="image_server__read_image", + name=f"image_server__{tool_name}", ) ] ) assert messages[0]["content"][0]["type"] == "image_url" + + +async def test_mcp_png_result_reaches_openai_message(tmp_path: Path) -> None: + await _assert_tool_returns_image(tmp_path, image_format="PNG", tool_name="read_png") + + +async def test_mcp_webp_result_reaches_openai_message(tmp_path: Path) -> None: + await _assert_tool_returns_image(tmp_path, image_format="WEBP", tool_name="read_webp") From 941455f0e129300604f5416b884c5702c542c89f Mon Sep 17 00:00:00 2001 From: Mohamed Akbarally Date: Wed, 11 Mar 2026 08:59:49 +0000 Subject: [PATCH 3/4] add AssistantMessage metadata support --- src/stirrup/clients/utils.py | 2 ++ src/stirrup/core/models.py | 1 + tests/test_clients_utils.py | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 tests/test_clients_utils.py diff --git a/src/stirrup/clients/utils.py b/src/stirrup/clients/utils.py index aa90e31..0ab46f3 100644 --- a/src/stirrup/clients/utils.py +++ b/src/stirrup/clients/utils.py @@ -124,6 +124,8 @@ def to_openai_messages(msgs: list[ChatMessage]) -> list[dict[str, Any]]: out.append({"role": "user", "content": content_to_openai(m.content)}) elif isinstance(m, AssistantMessage): msg: dict[str, Any] = {"role": "assistant", "content": content_to_openai(m.content)} + if m.metadata: + msg["metadata"] = m.metadata if m.reasoning: if m.reasoning.content: diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py index 2386434..5294d00 100644 --- a/src/stirrup/core/models.py +++ b/src/stirrup/core/models.py @@ -622,6 +622,7 @@ class AssistantMessage(BaseModel): content: Content tool_calls: Annotated[list[ToolCall], Field(default_factory=list)] token_usage: Annotated[TokenUsage, Field(default_factory=TokenUsage)] + metadata: Annotated[dict[str, Any], Field(default_factory=dict)] request_start_time: float | None = None request_end_time: float | None = None diff --git a/tests/test_clients_utils.py b/tests/test_clients_utils.py new file mode 100644 index 0000000..24f0a93 --- /dev/null +++ b/tests/test_clients_utils.py @@ -0,0 +1,22 @@ +"""Tests for OpenAI client utility helpers.""" + +from stirrup.clients.utils import to_openai_messages +from stirrup.core.models import AssistantMessage, TokenUsage + + +def test_to_openai_messages_forwards_assistant_metadata() -> None: + message = AssistantMessage( + content="Hello", + token_usage=TokenUsage(), + metadata={"source": "cache", "attempt": 2}, + ) + + result = to_openai_messages([message]) + + assert result == [ + { + "role": "assistant", + "content": [{"type": "text", "text": "Hello"}], + "metadata": {"source": "cache", "attempt": 2}, + } + ] From f1eaa4324a329d9794887f0ea0e7b130cb0ad1f8 Mon Sep 17 00:00:00 2001 From: Mohamed Akbarally Date: Wed, 11 Mar 2026 09:03:15 +0000 Subject: [PATCH 4/4] add generated ids to AssistantMessage --- src/stirrup/core/models.py | 2 ++ tests/test_clients_utils.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py index 5294d00..4246150 100644 --- a/src/stirrup/core/models.py +++ b/src/stirrup/core/models.py @@ -11,6 +11,7 @@ from tempfile import NamedTemporaryFile from types import TracebackType from typing import Annotated, Any, ClassVar, Literal, Protocol, Self, overload, runtime_checkable +from uuid import uuid4 import filetype from moviepy import AudioFileClip, VideoFileClip @@ -617,6 +618,7 @@ class Reasoning(BaseModel): class AssistantMessage(BaseModel): """LLM response message with optional tool calls and token usage tracking.""" + id: str = Field(default_factory=lambda: uuid4().hex) role: Literal["assistant"] = "assistant" reasoning: Reasoning | None = None content: Content diff --git a/tests/test_clients_utils.py b/tests/test_clients_utils.py index 24f0a93..7c3759a 100644 --- a/tests/test_clients_utils.py +++ b/tests/test_clients_utils.py @@ -4,9 +4,24 @@ from stirrup.core.models import AssistantMessage, TokenUsage +def test_assistant_message_generates_id() -> None: + first = AssistantMessage(content="Hello", tool_calls=[], token_usage=TokenUsage(), metadata={}) + second = AssistantMessage( + content="Hello again", + tool_calls=[], + token_usage=TokenUsage(), + metadata={}, + ) + + assert first.id + assert second.id + assert first.id != second.id + + def test_to_openai_messages_forwards_assistant_metadata() -> None: message = AssistantMessage( content="Hello", + tool_calls=[], token_usage=TokenUsage(), metadata={"source": "cache", "attempt": 2}, )