From 399b4e7aa58204a9c50d1f014ce94d64a465f0c0 Mon Sep 17 00:00:00 2001
From: Clinton Phillips <clintdotphillips@gmail.com>
Date: Thu, 14 May 2026 11:05:28 -0400
Subject: [PATCH] fix(tools): preserve tool flow when caller appends unrelated
 messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #1536.

The tool_runner's auto-append of (assistant, tool_result) was previously
gated on a single `_messages_modified` flag that flipped to True whenever
the caller called `append_messages` inside the loop body. This conflated
two distinct use cases:

1. "Modifying tool results" — caller appends a custom tool_result to
   override the SDK's auto-generated one. Auto-append should skip.
2. "Append an unrelated user message" (#1536) — caller appends, say,
   "be concise in your response" between iterations. The tool_result
   still needs to be threaded back into history; otherwise the next
   request omits it, the model re-asks for the same tool call, and the
   loop spins forever.

This change replaces the flag-based gate with a content-based check:
auto-append is skipped only when the last user message in history
already contains a tool_result block whose tool_use_id matches the one
we were about to write. Unrelated messages don't match, so the tool
flow stays correctly threaded.

Sync and async `__run__` are updated symmetrically. The existing
`test_custom_message_handling` xfail is preserved — it depends on
additional re-threading logic and snapshot updates that are out of
scope for this fix.
---
 src/anthropic/lib/tools/_beta_runner.py | 52 +++++++++++++++++++++-
 tests/lib/tools/test_runners.py         | 57 +++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/src/anthropic/lib/tools/_beta_runner.py b/src/anthropic/lib/tools/_beta_runner.py
index 52e48698..d1039560 100644
--- a/src/anthropic/lib/tools/_beta_runner.py
+++ b/src/anthropic/lib/tools/_beta_runner.py
@@ -125,6 +125,44 @@ def _should_stop(self) -> bool:
             return True
         return False
 
+    def _tool_response_already_appended(self, response: BetaMessageParam) -> bool:
+        """Return True if the tool-result message we would auto-append has already
+        been added to the conversation history (e.g. the caller appended a custom
+        tool result themselves via ``append_messages``).
+
+        The check matches by ``tool_use_id`` on the tool_result blocks in the last
+        user message. This lets callers append unrelated messages (e.g. an extra
+        user instruction) inside the loop body without breaking the
+        tool-call/tool-result threading: no tool_use_ids match, so the assistant
+        + tool result still get auto-appended on the next iteration.
+        """
+        response_content = response.get("content")
+        if not isinstance(response_content, list):
+            return False
+        response_ids = {
+            block.get("tool_use_id")
+            for block in response_content
+            if isinstance(block, dict) and block.get("type") == "tool_result" and block.get("tool_use_id")
+        }
+        if not response_ids:
+            return False
+
+        msgs = self._params.get("messages", [])
+        if not msgs:
+            return False
+        last = msgs[-1]
+        if not isinstance(last, dict) or last.get("role") != "user":
+            return False
+        last_content = last.get("content")
+        if not isinstance(last_content, list):
+            return False
+        last_ids = {
+            block.get("tool_use_id")
+            for block in last_content
+            if isinstance(block, dict) and block.get("type") == "tool_result" and block.get("tool_use_id")
+        }
+        return response_ids.issubset(last_ids)
+
 
 class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Generic[RunnerItemT, ResponseFormatT], ABC):
     def __init__(
@@ -278,7 +316,12 @@ def __run__(self) -> Iterator[RunnerItemT]:
                     log.debug("Tool call was not requested, exiting from tool runner loop.")
                     return
 
-                if not self._messages_modified:
+                # Auto-append the assistant + tool result unless the caller already
+                # supplied a matching tool_result via append_messages (the "modifying
+                # tool results" pattern). Callers who append unrelated messages
+                # (e.g. an extra user instruction) inside the loop body don't match
+                # this check, so the tool flow stays threaded — see #1536.
+                if not self._tool_response_already_appended(response):
                     self.append_messages(message, response)
 
             self._messages_modified = False
@@ -559,7 +602,12 @@ async def __run__(self) -> AsyncIterator[RunnerItemT]:
                     log.debug("Tool call was not requested, exiting from tool runner loop.")
                     return
 
-                if not self._messages_modified:
+                # Auto-append the assistant + tool result unless the caller already
+                # supplied a matching tool_result via append_messages (the "modifying
+                # tool results" pattern). Callers who append unrelated messages
+                # (e.g. an extra user instruction) inside the loop body don't match
+                # this check, so the tool flow stays threaded — see #1536.
+                if not self._tool_response_already_appended(response):
                     self.append_messages(message, response)
 
             self._messages_modified = False
diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py
index 225fd2b5..da60829d 100644
--- a/tests/lib/tools/test_runners.py
+++ b/tests/lib/tools/test_runners.py
@@ -683,6 +683,63 @@ def _get_weather(location: str, units: Literal["c", "f"]) -> Dict[str, Any]:
         }
 
 
+def test_tool_response_already_appended_detects_matching_tool_result(client: Anthropic) -> None:
+    """Regression test for #1536.
+
+    The runner's auto-append of (assistant, tool_result) should only be skipped when
+    the caller already supplied a matching tool_result. If the caller appended an
+    unrelated message (e.g. an extra user instruction), the tool flow must still be
+    threaded through.
+    """
+
+    @beta_tool
+    def noop() -> BetaFunctionToolResultType:
+        """A no-op tool used for type plumbing only."""
+        return "ok"
+
+    runner = client.beta.messages.tool_runner(
+        model="claude-haiku-4-5",
+        messages=[{"role": "user", "content": "hi"}],
+        tools=[noop],
+        max_tokens=64,
+    )
+
+    matching_response: BetaMessageParam = {
+        "role": "user",
+        "content": [
+            BetaToolResultBlockParam(
+                tool_use_id="toolu_TEST",
+                content="result",
+                type="tool_result",
+            )
+        ],
+    }
+
+    # Empty history: nothing to match against
+    assert runner._tool_response_already_appended(matching_response) is False
+
+    # History contains a user message with no tool_result blocks: still no match
+    runner.append_messages({"role": "user", "content": "be concise in your response"})
+    assert runner._tool_response_already_appended(matching_response) is False
+
+    # History now contains a matching tool_result (caller pre-supplied it): match
+    runner.append_messages(matching_response)
+    assert runner._tool_response_already_appended(matching_response) is True
+
+    # Response with a different tool_use_id should not match the existing one
+    different_response: BetaMessageParam = {
+        "role": "user",
+        "content": [
+            BetaToolResultBlockParam(
+                tool_use_id="toolu_OTHER",
+                content="result",
+                type="tool_result",
+            )
+        ],
+    }
+    assert runner._tool_response_already_appended(different_response) is False
+
+
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 def test_tool_runner_method_in_sync(sync: bool, client: Anthropic, async_client: AsyncAnthropic) -> None:
     checking_client: "Anthropic | AsyncAnthropic" = client if sync else async_client