From 399b4e7aa58204a9c50d1f014ce94d64a465f0c0 Mon Sep 17 00:00:00 2001 From: Clinton Phillips Date: Thu, 14 May 2026 11:05:28 -0400 Subject: [PATCH] fix(tools): preserve tool flow when caller appends unrelated messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1536. The tool_runner's auto-append of (assistant, tool_result) was previously gated on a single `_messages_modified` flag that flipped to True whenever the caller called `append_messages` inside the loop body. This conflated two distinct use cases: 1. "Modifying tool results" — caller appends a custom tool_result to override the SDK's auto-generated one. Auto-append should skip. 2. "Append an unrelated user message" (#1536) — caller appends, say, "be concise in your response" between iterations. The tool_result still needs to be threaded back into history; otherwise the next request omits it, the model re-asks for the same tool call, and the loop spins forever. This change replaces the flag-based gate with a content-based check: auto-append is skipped only when the last user message in history already contains a tool_result block whose tool_use_id matches the one we were about to write. Unrelated messages don't match, so the tool flow stays correctly threaded. Sync and async `__run__` are updated symmetrically. The existing `test_custom_message_handling` xfail is preserved — it depends on additional re-threading logic and snapshot updates that are out of scope for this fix. --- src/anthropic/lib/tools/_beta_runner.py | 52 +++++++++++++++++++++- tests/lib/tools/test_runners.py | 57 +++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/src/anthropic/lib/tools/_beta_runner.py b/src/anthropic/lib/tools/_beta_runner.py index 52e48698..d1039560 100644 --- a/src/anthropic/lib/tools/_beta_runner.py +++ b/src/anthropic/lib/tools/_beta_runner.py @@ -125,6 +125,44 @@ def _should_stop(self) -> bool: return True return False + def _tool_response_already_appended(self, response: BetaMessageParam) -> bool: + """Return True if the tool-result message we would auto-append has already + been added to the conversation history (e.g. the caller appended a custom + tool result themselves via ``append_messages``). + + The check matches by ``tool_use_id`` on the tool_result blocks in the last + user message. This lets callers append unrelated messages (e.g. an extra + user instruction) inside the loop body without breaking the + tool-call/tool-result threading: no tool_use_ids match, so the assistant + + tool result still get auto-appended on the next iteration. + """ + response_content = response.get("content") + if not isinstance(response_content, list): + return False + response_ids = { + block.get("tool_use_id") + for block in response_content + if isinstance(block, dict) and block.get("type") == "tool_result" and block.get("tool_use_id") + } + if not response_ids: + return False + + msgs = self._params.get("messages", []) + if not msgs: + return False + last = msgs[-1] + if not isinstance(last, dict) or last.get("role") != "user": + return False + last_content = last.get("content") + if not isinstance(last_content, list): + return False + last_ids = { + block.get("tool_use_id") + for block in last_content + if isinstance(block, dict) and block.get("type") == "tool_result" and block.get("tool_use_id") + } + return response_ids.issubset(last_ids) + class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Generic[RunnerItemT, ResponseFormatT], ABC): def __init__( @@ -278,7 +316,12 @@ def __run__(self) -> Iterator[RunnerItemT]: log.debug("Tool call was not requested, exiting from tool runner loop.") return - if not self._messages_modified: + # Auto-append the assistant + tool result unless the caller already + # supplied a matching tool_result via append_messages (the "modifying + # tool results" pattern). Callers who append unrelated messages + # (e.g. an extra user instruction) inside the loop body don't match + # this check, so the tool flow stays threaded — see #1536. + if not self._tool_response_already_appended(response): self.append_messages(message, response) self._messages_modified = False @@ -559,7 +602,12 @@ async def __run__(self) -> AsyncIterator[RunnerItemT]: log.debug("Tool call was not requested, exiting from tool runner loop.") return - if not self._messages_modified: + # Auto-append the assistant + tool result unless the caller already + # supplied a matching tool_result via append_messages (the "modifying + # tool results" pattern). Callers who append unrelated messages + # (e.g. an extra user instruction) inside the loop body don't match + # this check, so the tool flow stays threaded — see #1536. + if not self._tool_response_already_appended(response): self.append_messages(message, response) self._messages_modified = False diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py index 225fd2b5..da60829d 100644 --- a/tests/lib/tools/test_runners.py +++ b/tests/lib/tools/test_runners.py @@ -683,6 +683,63 @@ def _get_weather(location: str, units: Literal["c", "f"]) -> Dict[str, Any]: } +def test_tool_response_already_appended_detects_matching_tool_result(client: Anthropic) -> None: + """Regression test for #1536. + + The runner's auto-append of (assistant, tool_result) should only be skipped when + the caller already supplied a matching tool_result. If the caller appended an + unrelated message (e.g. an extra user instruction), the tool flow must still be + threaded through. + """ + + @beta_tool + def noop() -> BetaFunctionToolResultType: + """A no-op tool used for type plumbing only.""" + return "ok" + + runner = client.beta.messages.tool_runner( + model="claude-haiku-4-5", + messages=[{"role": "user", "content": "hi"}], + tools=[noop], + max_tokens=64, + ) + + matching_response: BetaMessageParam = { + "role": "user", + "content": [ + BetaToolResultBlockParam( + tool_use_id="toolu_TEST", + content="result", + type="tool_result", + ) + ], + } + + # Empty history: nothing to match against + assert runner._tool_response_already_appended(matching_response) is False + + # History contains a user message with no tool_result blocks: still no match + runner.append_messages({"role": "user", "content": "be concise in your response"}) + assert runner._tool_response_already_appended(matching_response) is False + + # History now contains a matching tool_result (caller pre-supplied it): match + runner.append_messages(matching_response) + assert runner._tool_response_already_appended(matching_response) is True + + # Response with a different tool_use_id should not match the existing one + different_response: BetaMessageParam = { + "role": "user", + "content": [ + BetaToolResultBlockParam( + tool_use_id="toolu_OTHER", + content="result", + type="tool_result", + ) + ], + } + assert runner._tool_response_already_appended(different_response) is False + + @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"]) def test_tool_runner_method_in_sync(sync: bool, client: Anthropic, async_client: AsyncAnthropic) -> None: checking_client: "Anthropic | AsyncAnthropic" = client if sync else async_client