From 89d4ecc59b5157a4d1d09da2d456692127eb0405 Mon Sep 17 00:00:00 2001
From: Mohamed Akbarally <mohamed.akbarally@gmail.com>
Date: Wed, 11 Mar 2026 08:29:45 +0000
Subject: [PATCH 1/4] add mcp image support

---
 src/stirrup/tools/mcp.py      | 56 ++++++++++++++++++----
 tests/test_mcp_image_smoke.py | 87 +++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_mcp_image_smoke.py

diff --git a/src/stirrup/tools/mcp.py b/src/stirrup/tools/mcp.py
index 399bb0e..18f30a3 100644
--- a/src/stirrup/tools/mcp.py
+++ b/src/stirrup/tools/mcp.py
@@ -37,7 +37,16 @@
 from json_schema_to_pydantic import create_model
 from pydantic import BaseModel, Field, model_validator
 
-from stirrup.core.models import Tool, ToolProvider, ToolResult, ToolUseCountMetadata
+from stirrup.core.models import (
+    AudioContentBlock,
+    Content,
+    ContentBlock,
+    ImageContentBlock,
+    Tool,
+    ToolProvider,
+    ToolResult,
+    ToolUseCountMetadata,
+)
 
 # MCP imports (optional dependency)
 try:
@@ -45,6 +54,15 @@
     from mcp.client.sse import sse_client
     from mcp.client.stdio import stdio_client
     from mcp.client.streamable_http import streamablehttp_client
+    from mcp.types import (
+        AudioContent as MCPAudioContent,
+    )
+    from mcp.types import (
+        ImageContent as MCPImageContent,
+    )
+    from mcp.types import (
+        TextContent as MCPTextContent,
+    )
 except ImportError as e:
     raise ImportError(
         "Requires installation of the mcp extra. Install with (for example): `uv pip install stirrup[mcp]` or `uv add stirrup[mcp]`",
@@ -345,7 +363,29 @@ def all_tools(self) -> dict[str, list[str]]:
         """
         return {server: [t["name"] for t in tools] for server, tools in self._tools.items()}
 
-    async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]) -> str:
+    def _convert_mcp_content(self, content_blocks: list[Any]) -> Content:
+        """Convert MCP content blocks into Stirrup content blocks."""
+        content: list[ContentBlock] = []
+
+        for block in content_blocks:
+            if isinstance(block, MCPTextContent):
+                content.append(block.text)
+                continue
+            if isinstance(block, MCPImageContent):
+                content.append(ImageContentBlock(data=block.data))
+                continue
+            if isinstance(block, MCPAudioContent):
+                content.append(AudioContentBlock(data=block.data))
+                continue
+            raise TypeError(f"Unsupported MCP content block: {type(block).__name__}")
+
+        if not content:
+            return ""
+        if len(content) == 1 and isinstance(content[0], str):
+            return content[0]
+        return content
+
+    async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]) -> Content:
         """Call a tool on a specific MCP server.
 
         Args:
@@ -354,7 +394,7 @@ async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]
             arguments: Arguments to pass to the tool.
 
         Returns:
-            Tool result as a string (text content extracted from response).
+            Tool result converted into Stirrup content blocks.
 
         Raises:
             ValueError: If server is not connected.
@@ -364,10 +404,7 @@ async def call_tool(self, server: str, tool_name: str, arguments: dict[str, Any]
             raise ValueError(f"Server '{server}' not connected. Available: {self.servers}")
 
         result = await session.call_tool(tool_name, arguments)
-
-        # Extract text content from result
-        text_parts = [str(content.text) for content in result.content if hasattr(content, "text")]
-        return "\n".join(text_parts)
+        return self._convert_mcp_content(result.content)
 
     def get_all_tools(self) -> list[Tool[Any, ToolUseCountMetadata]]:
         """Get individual Tool objects for each tool from all connected MCP servers.
@@ -403,15 +440,14 @@ async def executor(
                     _tool: str = mcp_tool_name,
                 ) -> ToolResult[ToolUseCountMetadata]:
                     content = await self.call_tool(_server, _tool, params.model_dump())
-                    xml_content = f"<mcp_result>\n{content}\n</mcp_result>"
-                    return ToolResult(content=xml_content, metadata=ToolUseCountMetadata())
+                    return ToolResult(content=content, metadata=ToolUseCountMetadata())
 
                 tools.append(
                     Tool(
                         name=unique_name,
                         description=tool_info.get("description") or f"Tool '{mcp_tool_name}' from {server_name}",
                         parameters=params_model,
-                        executor=executor,  # ty: ignore[invalid-argument-type]
+                        executor=executor,
                     )
                 )
 
diff --git a/tests/test_mcp_image_smoke.py b/tests/test_mcp_image_smoke.py
new file mode 100644
index 0000000..67fe367
--- /dev/null
+++ b/tests/test_mcp_image_smoke.py
@@ -0,0 +1,87 @@
+"""Smoke test for MCP image tool results."""
+
+import base64
+import inspect
+import sys
+from io import BytesIO
+from pathlib import Path
+from typing import cast
+
+import pytest
+from PIL import Image
+
+from stirrup.clients.utils import to_openai_messages
+from stirrup.core.models import ImageContentBlock, ToolMessage, ToolResult, ToolUseCountMetadata
+from stirrup.tools.mcp import MCPConfig, MCPToolProvider
+
+pytest.importorskip("mcp.server.fastmcp")
+
+
+def _png_b64() -> str:
+    img = Image.new("RGB", (1, 1), color=(255, 0, 0))
+    buffer = BytesIO()
+    img.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode("ascii")
+
+
+def _write_image_server(script_path: Path) -> None:
+    png_b64 = _png_b64()
+    script_path.write_text(
+        f"""
+import base64
+
+from mcp.server.fastmcp import FastMCP, Image
+
+mcp = FastMCP("image-server")
+
+
+@mcp.tool()
+def read_image() -> Image:
+    return Image(data=base64.b64decode("{png_b64}"), format="png")
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
+""".strip()
+    )
+
+
+def _make_provider(script_path: Path) -> MCPToolProvider:
+    config = MCPConfig.model_validate(
+        {
+            "mcpServers": {
+                "image_server": {
+                    "command": sys.executable,
+                    "args": [str(script_path)],
+                }
+            }
+        }
+    )
+    return MCPToolProvider(config=config)
+
+
+async def test_mcp_image_result_reaches_openai_message(tmp_path: Path) -> None:
+    script_path = tmp_path / "image_server.py"
+    _write_image_server(script_path)
+
+    provider = _make_provider(script_path)
+    async with provider as tools:
+        tool = next(tool for tool in tools if tool.name == "image_server__read_image")
+        executor_result = tool.executor(tool.parameters())
+        raw_result = await executor_result if inspect.isawaitable(executor_result) else executor_result
+        result = cast(ToolResult[ToolUseCountMetadata], raw_result)
+
+    assert isinstance(result.content, list)
+    assert len(result.content) == 1
+    assert isinstance(result.content[0], ImageContentBlock)
+
+    messages = to_openai_messages(
+        [
+            ToolMessage(
+                content=result.content,
+                tool_call_id="call_1",
+                name="image_server__read_image",
+            )
+        ]
+    )
+    assert messages[0]["content"][0]["type"] == "image_url"

From d7239099d831b76da2c67b10ee87f25e394bb3a5 Mon Sep 17 00:00:00 2001
From: Mohamed Akbarally <mohamed.akbarally@gmail.com>
Date: Wed, 11 Mar 2026 08:49:52 +0000
Subject: [PATCH 2/4] add webp support & improve tests

---
 src/stirrup/core/models.py    |  1 +
 tests/test_agent.py           | 50 +++++++++++++++++++++++++++++++++++
 tests/test_mcp_image_smoke.py | 41 +++++++++++++++++++++-------
 3 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py
index c3a9768..2386434 100644
--- a/src/stirrup/core/models.py
+++ b/src/stirrup/core/models.py
@@ -118,6 +118,7 @@ class ImageContentBlock(BinaryContentBlock):
     allowed_mime_types: ClassVar[set[str]] = {
         "image/jpeg",  # JPEG
         "image/png",  # PNG
+        "image/webp",  # WebP
         "image/gif",  # GIF
         "image/bmp",  # BMP
         "image/tiff",  # TIFF
diff --git a/tests/test_agent.py b/tests/test_agent.py
index f8af13e..d2921fb 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -1,5 +1,8 @@
 """Tests for agent core functionality."""
 
+from io import BytesIO
+
+from PIL import Image
 from pydantic import BaseModel
 
 from stirrup.constants import FINISH_TOOL_NAME
@@ -7,6 +10,7 @@
 from stirrup.core.models import (
     AssistantMessage,
     ChatMessage,
+    ImageContentBlock,
     LLMClient,
     SummaryMessage,
     SystemMessage,
@@ -42,6 +46,13 @@ async def generate(self, messages: list[ChatMessage], tools: dict[str, Tool]) ->
         return response
 
 
+def _sample_png_block() -> ImageContentBlock:
+    img = Image.new("RGB", (1, 1), color=(255, 0, 0))
+    buffer = BytesIO()
+    img.save(buffer, format="PNG")
+    return ImageContentBlock(data=buffer.getvalue())
+
+
 async def test_agent_basic_finish() -> None:
     """Test agent completes successfully when finish tool is called."""
     # Create mock responses
@@ -212,6 +223,45 @@ def echo_executor(params: EchoParams) -> ToolResult:
     assert "Echo: Hello" in echo_messages[0].content
 
 
+async def test_run_tool_preserves_image_content() -> None:
+    """Test run_tool preserves image blocks returned by tools."""
+
+    class EmptyParams(BaseModel):
+        pass
+
+    image_block = _sample_png_block()
+
+    def image_executor(_params: EmptyParams) -> ToolResult:
+        return ToolResult(content=[image_block])
+
+    image_tool = Tool[EmptyParams, None](
+        name="image_tool",
+        description="Return an image",
+        parameters=EmptyParams,
+        executor=image_executor,  # ty: ignore[invalid-argument-type]
+    )
+
+    client = MockLLMClient([])
+    agent = Agent(
+        client=client,
+        name="test-agent",
+        max_turns=1,
+        tools=[image_tool],
+        finish_tool=SIMPLE_FINISH_TOOL,
+    )
+
+    async with agent.session() as session:
+        tool_message = await session.run_tool(
+            ToolCall(name="image_tool", arguments="{}", tool_call_id="call_1"),
+            run_metadata={},
+        )
+
+    assert isinstance(tool_message.content, list)
+    assert len(tool_message.content) == 1
+    assert isinstance(tool_message.content[0], ImageContentBlock)
+    assert tool_message.content[0].mime_type == "image/png"
+
+
 async def test_agent_invalid_tool_call() -> None:
     """Test agent handles invalid tool calls gracefully."""
     # Create mock responses
diff --git a/tests/test_mcp_image_smoke.py b/tests/test_mcp_image_smoke.py
index 67fe367..0069323 100644
--- a/tests/test_mcp_image_smoke.py
+++ b/tests/test_mcp_image_smoke.py
@@ -17,15 +17,18 @@
 pytest.importorskip("mcp.server.fastmcp")
 
 
-def _png_b64() -> str:
+def _image_b64(image_format: str) -> str:
+    """Build a tiny real image payload for the temp MCP server."""
     img = Image.new("RGB", (1, 1), color=(255, 0, 0))
     buffer = BytesIO()
-    img.save(buffer, format="PNG")
+    img.save(buffer, format=image_format)
     return base64.b64encode(buffer.getvalue()).decode("ascii")
 
 
-def _write_image_server(script_path: Path) -> None:
-    png_b64 = _png_b64()
+def _write_image_server(script_path: Path, image_format: str, tool_name: str) -> None:
+    """Write a one-file stdio MCP server with a single image-returning tool."""
+    image_b64 = _image_b64(image_format)
+    image_ext = image_format.lower()
     script_path.write_text(
         f"""
 import base64
@@ -36,8 +39,8 @@ def _write_image_server(script_path: Path) -> None:
 
 
 @mcp.tool()
-def read_image() -> Image:
-    return Image(data=base64.b64decode("{png_b64}"), format="png")
+def {tool_name}() -> Image:
+    return Image(data=base64.b64decode("{image_b64}"), format="{image_ext}")
 
 
 if __name__ == "__main__":
@@ -47,6 +50,7 @@ def read_image() -> Image:
 
 
 def _make_provider(script_path: Path) -> MCPToolProvider:
+    """Create a provider that launches the temp MCP server over stdio."""
     config = MCPConfig.model_validate(
         {
             "mcpServers": {
@@ -60,28 +64,45 @@ def _make_provider(script_path: Path) -> MCPToolProvider:
     return MCPToolProvider(config=config)
 
 
-async def test_mcp_image_result_reaches_openai_message(tmp_path: Path) -> None:
+async def _assert_tool_returns_image(
+    tmp_path: Path,
+    *,
+    image_format: str,
+    tool_name: str,
+) -> None:
+    """Assert the MCP bridge preserves an image through OpenAI-style serialization."""
     script_path = tmp_path / "image_server.py"
-    _write_image_server(script_path)
+    _write_image_server(script_path, image_format=image_format, tool_name=tool_name)
 
     provider = _make_provider(script_path)
     async with provider as tools:
-        tool = next(tool for tool in tools if tool.name == "image_server__read_image")
+        tool = next(tool for tool in tools if tool.name == f"image_server__{tool_name}")
         executor_result = tool.executor(tool.parameters())
         raw_result = await executor_result if inspect.isawaitable(executor_result) else executor_result
         result = cast(ToolResult[ToolUseCountMetadata], raw_result)
 
+    # First prove the MCP bridge produced a real Stirrup image block.
     assert isinstance(result.content, list)
     assert len(result.content) == 1
     assert isinstance(result.content[0], ImageContentBlock)
+    assert result.content[0].mime_type == f"image/{image_format.lower()}"
 
+    # Then prove the image still survives message serialization for the model layer.
     messages = to_openai_messages(
         [
             ToolMessage(
                 content=result.content,
                 tool_call_id="call_1",
-                name="image_server__read_image",
+                name=f"image_server__{tool_name}",
             )
         ]
     )
     assert messages[0]["content"][0]["type"] == "image_url"
+
+
+async def test_mcp_png_result_reaches_openai_message(tmp_path: Path) -> None:
+    await _assert_tool_returns_image(tmp_path, image_format="PNG", tool_name="read_png")
+
+
+async def test_mcp_webp_result_reaches_openai_message(tmp_path: Path) -> None:
+    await _assert_tool_returns_image(tmp_path, image_format="WEBP", tool_name="read_webp")

From 941455f0e129300604f5416b884c5702c542c89f Mon Sep 17 00:00:00 2001
From: Mohamed Akbarally <mohamed.akbarally@gmail.com>
Date: Wed, 11 Mar 2026 08:59:49 +0000
Subject: [PATCH 3/4] add AssistantMessage metadata support

---
 src/stirrup/clients/utils.py |  2 ++
 src/stirrup/core/models.py   |  1 +
 tests/test_clients_utils.py  | 22 ++++++++++++++++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 tests/test_clients_utils.py

diff --git a/src/stirrup/clients/utils.py b/src/stirrup/clients/utils.py
index aa90e31..0ab46f3 100644
--- a/src/stirrup/clients/utils.py
+++ b/src/stirrup/clients/utils.py
@@ -124,6 +124,8 @@ def to_openai_messages(msgs: list[ChatMessage]) -> list[dict[str, Any]]:
             out.append({"role": "user", "content": content_to_openai(m.content)})
         elif isinstance(m, AssistantMessage):
             msg: dict[str, Any] = {"role": "assistant", "content": content_to_openai(m.content)}
+            if m.metadata:
+                msg["metadata"] = m.metadata
 
             if m.reasoning:
                 if m.reasoning.content:
diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py
index 2386434..5294d00 100644
--- a/src/stirrup/core/models.py
+++ b/src/stirrup/core/models.py
@@ -622,6 +622,7 @@ class AssistantMessage(BaseModel):
     content: Content
     tool_calls: Annotated[list[ToolCall], Field(default_factory=list)]
     token_usage: Annotated[TokenUsage, Field(default_factory=TokenUsage)]
+    metadata: Annotated[dict[str, Any], Field(default_factory=dict)]
     request_start_time: float | None = None
     request_end_time: float | None = None
 
diff --git a/tests/test_clients_utils.py b/tests/test_clients_utils.py
new file mode 100644
index 0000000..24f0a93
--- /dev/null
+++ b/tests/test_clients_utils.py
@@ -0,0 +1,22 @@
+"""Tests for OpenAI client utility helpers."""
+
+from stirrup.clients.utils import to_openai_messages
+from stirrup.core.models import AssistantMessage, TokenUsage
+
+
+def test_to_openai_messages_forwards_assistant_metadata() -> None:
+    message = AssistantMessage(
+        content="Hello",
+        token_usage=TokenUsage(),
+        metadata={"source": "cache", "attempt": 2},
+    )
+
+    result = to_openai_messages([message])
+
+    assert result == [
+        {
+            "role": "assistant",
+            "content": [{"type": "text", "text": "Hello"}],
+            "metadata": {"source": "cache", "attempt": 2},
+        }
+    ]

From f1eaa4324a329d9794887f0ea0e7b130cb0ad1f8 Mon Sep 17 00:00:00 2001
From: Mohamed Akbarally <mohamed.akbarally@gmail.com>
Date: Wed, 11 Mar 2026 09:03:15 +0000
Subject: [PATCH 4/4] add generated ids to AssistantMessage

---
 src/stirrup/core/models.py  |  2 ++
 tests/test_clients_utils.py | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/stirrup/core/models.py b/src/stirrup/core/models.py
index 5294d00..4246150 100644
--- a/src/stirrup/core/models.py
+++ b/src/stirrup/core/models.py
@@ -11,6 +11,7 @@
 from tempfile import NamedTemporaryFile
 from types import TracebackType
 from typing import Annotated, Any, ClassVar, Literal, Protocol, Self, overload, runtime_checkable
+from uuid import uuid4
 
 import filetype
 from moviepy import AudioFileClip, VideoFileClip
@@ -617,6 +618,7 @@ class Reasoning(BaseModel):
 class AssistantMessage(BaseModel):
     """LLM response message with optional tool calls and token usage tracking."""
 
+    id: str = Field(default_factory=lambda: uuid4().hex)
     role: Literal["assistant"] = "assistant"
     reasoning: Reasoning | None = None
     content: Content
diff --git a/tests/test_clients_utils.py b/tests/test_clients_utils.py
index 24f0a93..7c3759a 100644
--- a/tests/test_clients_utils.py
+++ b/tests/test_clients_utils.py
@@ -4,9 +4,24 @@
 from stirrup.core.models import AssistantMessage, TokenUsage
 
 
+def test_assistant_message_generates_id() -> None:
+    first = AssistantMessage(content="Hello", tool_calls=[], token_usage=TokenUsage(), metadata={})
+    second = AssistantMessage(
+        content="Hello again",
+        tool_calls=[],
+        token_usage=TokenUsage(),
+        metadata={},
+    )
+
+    assert first.id
+    assert second.id
+    assert first.id != second.id
+
+
 def test_to_openai_messages_forwards_assistant_metadata() -> None:
     message = AssistantMessage(
         content="Hello",
+        tool_calls=[],
         token_usage=TokenUsage(),
         metadata={"source": "cache", "attempt": 2},
     )