From d0a7c59f41cd4e0844bf2076ff16e25c3ed9fc96 Mon Sep 17 00:00:00 2001 From: Zelys Date: Wed, 13 May 2026 12:58:27 -0500 Subject: [PATCH] fix(tools): append_messages() inside tool runner loop no longer infinite-loops or drops the assistant turn Fixes #1536. The runner used a _messages_modified boolean that could not distinguish between two distinct append behaviors: a user supplying their own tool_result (skip auto-append) vs. a user appending extra context (auto-append still needed). Both set the flag True, so calling append_messages() inside the loop caused either an infinite loop or a 400 API error depending on what was appended. Replace the flag with a message-count snapshot taken before each yield. After yield returns, slice the list to get what the user appended and branch on whether those messages contain a tool_result block. If yes, insert the assistant turn before the user's result only when they did not include it themselves. If no, generate tool responses as normal and splice the (assistant, tool_result) pair in before any extra messages. Add _set_messages_list helper to BaseToolRunner so the four call sites that replace the full message list can capture the list as a method parameter rather than a loop variable, satisfying both ruff B023 and mypy's callable-type inference. Remove the xfail from test_custom_message_handling and update its HTTP fixture to return a valid 200 response. Add three regression tests covering: plain user message append (no infinite loop), manual tool_result (assistant turn auto-inserted), and user-supplied assistant + tool_result (no duplicate assistant message). --- src/anthropic/lib/tools/_beta_runner.py | 105 ++++++-- .../f59a9391-643b-422c-96dc-1f28bc7ea4d7.json | 54 +++- tests/lib/tools/test_runners.py | 251 +++++++++++++++++- 3 files changed, 378 insertions(+), 32 deletions(-) diff --git a/src/anthropic/lib/tools/_beta_runner.py b/src/anthropic/lib/tools/_beta_runner.py index 52e486988..8adb31cd8 100644 --- a/src/anthropic/lib/tools/_beta_runner.py +++ b/src/anthropic/lib/tools/_beta_runner.py @@ -15,6 +15,7 @@ Iterator, Coroutine, AsyncIterator, + cast, ) from contextlib import contextmanager, asynccontextmanager from typing_extensions import TypedDict, override @@ -55,6 +56,17 @@ log = logging.getLogger(__name__) +def _has_tool_result(messages: List[BetaMessageParam]) -> bool: + """Return True if any message in the list contains a tool_result content block.""" + for msg in messages: + content = msg.get("content", []) + if isinstance(content, list) and any( + isinstance(b, dict) and b.get("type") == "tool_result" for b in content + ): + return True + return False + + class RequestOptions(TypedDict, total=False): extra_headers: Headers | None extra_query: Query | None @@ -85,7 +97,6 @@ def __init__( merged_headers = {**helper_header, **(options.get("extra_headers") or {})} options = {**options, "extra_headers": merged_headers} self._options = options - self._messages_modified = False self._cached_tool_call_response: BetaMessageParam | None = None self._max_iterations = max_iterations self._iteration_count = 0 @@ -116,10 +127,14 @@ def append_messages(self, *messages: BetaMessageParam | ParsedBetaMessage[Respon {"role": message.role, "content": message.content} if isinstance(message, BetaMessage) else message for message in messages ] - self._messages_modified = True self.set_messages_params(lambda params: {**params, "messages": [*params["messages"], *message_params]}) self._cached_tool_call_response = None + def _set_messages_list(self, messages: List[BetaMessageParam]) -> None: + # `messages` is a parameter here (not a loop variable), so lambdas that + # capture it satisfy both ruff B023 and mypy's callable-type inference. + self.set_messages_params(lambda params: {**params, "messages": messages}) + def _should_stop(self) -> bool: if self._max_iterations is not None and self._iteration_count >= self._max_iterations: return True @@ -260,6 +275,7 @@ def _check_and_compact(self) -> bool: def __run__(self) -> Iterator[RunnerItemT]: while not self._should_stop(): with self._handle_request() as item: + pre_yield_message_count = len(cast(List[BetaMessageParam], self._params["messages"])) yield item message = self._get_last_message() assert message is not None @@ -273,15 +289,41 @@ def __run__(self) -> Iterator[RunnerItemT]: # If the compaction was performed, skip tool call generation this iteration if not self._check_and_compact(): - response = self.generate_tool_call_response() - if response is None: - log.debug("Tool call was not requested, exiting from tool runner loop.") - return - - if not self._messages_modified: - self.append_messages(message, response) + all_messages = list(self._params["messages"]) + user_appended = all_messages[pre_yield_message_count:] + + if _has_tool_result(user_appended): + # User provided their own tool result. Ensure the assistant message + # precedes it — insert it only when the user did not include it. + if not any(m.get("role") == "assistant" for m in user_appended): + asst_param: BetaMessageParam = {"role": message.role, "content": message.content} + new_messages: List[BetaMessageParam] = [ + *all_messages[:pre_yield_message_count], + asst_param, + *user_appended, + ] + self._set_messages_list(new_messages) + else: + response = self.generate_tool_call_response() + if response is None: + log.debug("Tool call was not requested, exiting from tool runner loop.") + return + + if user_appended: + # User appended extra (non-tool_result) messages. Insert the + # auto-generated (assistant, tool_result) pair before them so + # message ordering stays valid for the API. + asst_param = {"role": message.role, "content": message.content} + new_messages = [ + *all_messages[:pre_yield_message_count], + asst_param, + response, + *user_appended, + ] + self._set_messages_list(new_messages) + else: + self.append_messages(message, response) - self._messages_modified = False self._cached_tool_call_response = None def until_done(self) -> ParsedBetaMessage[ResponseFormatT]: @@ -541,6 +583,7 @@ async def _check_and_compact(self) -> bool: async def __run__(self) -> AsyncIterator[RunnerItemT]: while not self._should_stop(): async with self._handle_request() as item: + pre_yield_message_count = len(cast(List[BetaMessageParam], self._params["messages"])) yield item message = await self._get_last_message() assert message is not None @@ -554,15 +597,41 @@ async def __run__(self) -> AsyncIterator[RunnerItemT]: # If the compaction was performed, skip tool call generation this iteration if not await self._check_and_compact(): - response = await self.generate_tool_call_response() - if response is None: - log.debug("Tool call was not requested, exiting from tool runner loop.") - return - - if not self._messages_modified: - self.append_messages(message, response) + all_messages = list(self._params["messages"]) + user_appended = all_messages[pre_yield_message_count:] + + if _has_tool_result(user_appended): + # User provided their own tool result. Ensure the assistant message + # precedes it — insert it only when the user did not include it. + if not any(m.get("role") == "assistant" for m in user_appended): + asst_param: BetaMessageParam = {"role": message.role, "content": message.content} + new_messages: List[BetaMessageParam] = [ + *all_messages[:pre_yield_message_count], + asst_param, + *user_appended, + ] + self._set_messages_list(new_messages) + else: + response = await self.generate_tool_call_response() + if response is None: + log.debug("Tool call was not requested, exiting from tool runner loop.") + return + + if user_appended: + # User appended extra (non-tool_result) messages. Insert the + # auto-generated (assistant, tool_result) pair before them so + # message ordering stays valid for the API. + asst_param = {"role": message.role, "content": message.content} + new_messages = [ + *all_messages[:pre_yield_message_count], + asst_param, + response, + *user_appended, + ] + self._set_messages_list(new_messages) + else: + self.append_messages(message, response) - self._messages_modified = False self._cached_tool_call_response = None async def until_done(self) -> ParsedBetaMessage[ResponseFormatT]: diff --git a/tests/lib/tools/__inline_snapshot__/test_runners/TestSyncRunTools/f59a9391-643b-422c-96dc-1f28bc7ea4d7.json b/tests/lib/tools/__inline_snapshot__/test_runners/TestSyncRunTools/f59a9391-643b-422c-96dc-1f28bc7ea4d7.json index 804a5a7c7..de80c1845 100644 --- a/tests/lib/tools/__inline_snapshot__/test_runners/TestSyncRunTools/f59a9391-643b-422c-96dc-1f28bc7ea4d7.json +++ b/tests/lib/tools/__inline_snapshot__/test_runners/TestSyncRunTools/f59a9391-643b-422c-96dc-1f28bc7ea4d7.json @@ -137,7 +137,7 @@ "anthropic-beta": "structured-outputs-2025-12-15", "x-stainless-retry-count": "0", "x-stainless-read-timeout": "600", - "content-length": "789" + "content-length": "598" }, "body": { "max_tokens": 1024, @@ -146,12 +146,29 @@ "role": "user", "content": "What's the weather in SF in Celsius?" }, + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_01GHndag5wQmbzNihYmV2UBj", + "name": "get_weather", + "input": { + "location": "San Francisco, CA", + "units": "c" + }, + "caller": { + "type": "direct" + } + } + ] + }, { "role": "user", "content": [ { "tool_use_id": "toolu_01GHndag5wQmbzNihYmV2UBj", - "content": "The weather in San Francisco, CA is currently sunny with a temperature of 20°C.", + "content": "The weather in San Francisco, CA is currently sunny with a temperature of 20\u00b0C.", "type": "tool_result" } ] @@ -191,12 +208,10 @@ } }, "response": { - "status_code": 400, + "status_code": 200, "headers": { "content-type": "application/json", - "content-length": "316", "connection": "keep-alive", - "x-should-retry": "false", "strict-transport-security": "max-age=31536000; includeSubDomains; preload", "server": "cloudflare", "cf-cache-status": "DYNAMIC", @@ -204,12 +219,29 @@ "content-security-policy": "default-src 'none'; frame-ancestors 'none'" }, "body": { - "type": "error", - "error": { - "type": "invalid_request_error", - "message": "messages.0.content.1: unexpected `tool_use_id` found in `tool_result` blocks: toolu_01GHndag5wQmbzNihYmV2UBj. Each `tool_result` block must have a corresponding `tool_use` block in the previous message." - }, - "request_id": "req_011CYHyk9NPsBYeGbC9LuDNK" + "model": "claude-haiku-4-5-20251001", + "id": "msg_01DSPL7PHKQYTe9VAFkHzsA3", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "The weather in San Francisco, CA is currently **20\u00b0C** and **Sunny**. Nice weather!" + } + ], + "stop_reason": "end_turn", + "stop_sequence": null, + "usage": { + "input_tokens": 787, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "cache_creation": { + "ephemeral_5m_input_tokens": 0, + "ephemeral_1h_input_tokens": 0 + }, + "output_tokens": 26, + "service_tier": "standard" + } } } } diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py index 6b92e7a9b..8c44cb553 100644 --- a/tests/lib/tools/test_runners.py +++ b/tests/lib/tools/test_runners.py @@ -1,6 +1,7 @@ import json import logging from typing import Any, Dict, List, Union, cast +from unittest.mock import NonCallableMagicMock, patch from typing_extensions import Literal import pytest @@ -73,7 +74,39 @@ ] ), "result": snapshot( - "ParsedBetaMessage(container=None, content=[ParsedBetaTextBlock(citations=None, parsed_output=None, text='The weather in San Francisco, CA is currently **20°C** and **Sunny**. Nice weather!', type='text')], context_management=None, id='msg_01DSPL7PHKQYTe9VAFkHzsA3', model='claude-haiku-4-5-20251001', role='assistant', stop_details=None, stop_reason='end_turn', stop_sequence=None, type='message', usage=BetaUsage(cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo=None, input_tokens=787, iterations=None, output_tokens=26, server_tool_use=None, service_tier='standard', speed=None))\n" + """\ +ParsedBetaMessage( + container=None, + content=[ + ParsedBetaTextBlock( + citations=None, + parsed_output=None, + text='The weather in San Francisco, CA is currently **20°C** and **Sunny**. Nice weather!', + type='text' + ) + ], + context_management=None, + id='msg_01DSPL7PHKQYTe9VAFkHzsA3', + model='claude-haiku-4-5-20251001', + role='assistant', + stop_details=None, + stop_reason='end_turn', + stop_sequence=None, + type='message', + usage=BetaUsage( + cache_creation=BetaCacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), + cache_creation_input_tokens=0, + cache_read_input_tokens=0, + inference_geo=None, + input_tokens=787, + iterations=None, + output_tokens=26, + server_tool_use=None, + service_tier='standard', + speed=None + ) +) +""" ), }, "streaming": { @@ -239,8 +272,6 @@ def get_weather(location: str, units: Literal["c", "f"]) -> BetaFunctionToolResu cast(Any, external("uuid:f59a9391-643b-422c-96dc-1f28bc7ea4d7.json")), ], ) - # TODO: fix the append_messages method - @pytest.mark.xfail(reason="bug in append messages") def test_custom_message_handling(self, snapshot_client: Anthropic) -> None: @beta_tool def get_weather(location: str, units: Literal["c", "f"]) -> BetaFunctionToolResultType: @@ -280,6 +311,220 @@ def get_weather(location: str, units: Literal["c", "f"]) -> BetaFunctionToolResu assert print_obj(message) == snapshots["custom"]["result"] + def test_append_extra_message_does_not_cause_infinite_loop(self) -> None: + """append_messages() with a non-tool_result message must not skip the + auto-generated (assistant, tool_result) pair (regression for issue #1536).""" + + @beta_tool + def get_weather(location: str) -> BetaFunctionToolResultType: + """Get the weather for a location.""" + return f"Sunny in {location}" + + tool_use_block = NonCallableMagicMock() + tool_use_block.type = "tool_use" + tool_use_block.id = "toolu_unit_001" + tool_use_block.name = "get_weather" + tool_use_block.input = {"location": "SF"} + + tool_use_msg = NonCallableMagicMock() + tool_use_msg.role = "assistant" + tool_use_msg.stop_reason = "tool_use" + tool_use_msg.container = None + tool_use_msg.content = [tool_use_block] + + text_block = NonCallableMagicMock() + text_block.type = "text" + text_block.text = "The weather is sunny." + + end_turn_msg = NonCallableMagicMock() + end_turn_msg.role = "assistant" + end_turn_msg.stop_reason = "end_turn" + end_turn_msg.container = None + end_turn_msg.content = [text_block] + + call_count = 0 + parse_calls: List[Any] = [] + + def mock_parse(**kwargs: Any) -> Any: + nonlocal call_count + call_count += 1 + parse_calls.append(kwargs) + return tool_use_msg if call_count == 1 else end_turn_msg + + client = Anthropic(api_key="test-key", base_url="http://127.0.0.1:1") + runner = client.beta.messages.tool_runner( + model="claude-haiku-4-5", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in SF?"}], + ) + + with patch.object(client.beta.messages, "parse", side_effect=mock_parse): + for _ in runner: + runner.append_messages({"role": "user", "content": "Remember: be concise."}) + + assert call_count == 2, "Expected exactly 2 API calls, not an infinite loop" + + second_messages: List[Any] = parse_calls[1]["messages"] + roles = [m["role"] for m in second_messages] + assert roles == ["user", "assistant", "user", "user"], ( + "Expected [initial_user, asst(tool_use), user(tool_result), user(extra)]" + ) + tool_result_content = second_messages[2].get("content", []) + assert isinstance(tool_result_content, list) and any( + isinstance(b, dict) and b.get("type") == "tool_result" for b in tool_result_content + ), "Third message must be the auto-generated tool_result" + assert second_messages[3].get("content") == "Remember: be concise.", ( + "Extra user message must come after the tool_result" + ) + + def test_append_manual_tool_result_prepends_assistant_message(self) -> None: + """When the user provides a tool_result manually, the runner must insert + the assistant message before it (regression for issue #1536).""" + + @beta_tool + def get_weather(location: str) -> BetaFunctionToolResultType: + """Get the weather for a location.""" + return f"Sunny in {location}" + + tool_use_block = NonCallableMagicMock() + tool_use_block.type = "tool_use" + tool_use_block.id = "toolu_unit_002" + tool_use_block.name = "get_weather" + tool_use_block.input = {"location": "SF"} + + tool_use_msg = NonCallableMagicMock() + tool_use_msg.role = "assistant" + tool_use_msg.stop_reason = "tool_use" + tool_use_msg.container = None + tool_use_msg.content = [tool_use_block] + + text_block = NonCallableMagicMock() + text_block.type = "text" + text_block.text = "The weather is sunny." + + end_turn_msg = NonCallableMagicMock() + end_turn_msg.role = "assistant" + end_turn_msg.stop_reason = "end_turn" + end_turn_msg.container = None + end_turn_msg.content = [text_block] + + call_count = 0 + parse_calls: List[Any] = [] + + def mock_parse(**kwargs: Any) -> Any: + nonlocal call_count + call_count += 1 + parse_calls.append(kwargs) + return tool_use_msg if call_count == 1 else end_turn_msg + + client = Anthropic(api_key="test-key", base_url="http://127.0.0.1:1") + runner = client.beta.messages.tool_runner( + model="claude-haiku-4-5", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in SF?"}], + ) + + with patch.object(client.beta.messages, "parse", side_effect=mock_parse): + for msg in runner: + if msg.stop_reason == "tool_use": + runner.append_messages( + BetaMessageParam( + role="user", + content=[ + BetaToolResultBlockParam( + tool_use_id=msg.content[0].id, + content="20°C and sunny", + type="tool_result", + ) + ], + ) + ) + + assert call_count == 2, "Expected exactly 2 API calls" + + second_messages: List[Any] = parse_calls[1]["messages"] + assert len(second_messages) == 3, "Expected [initial_user, asst(tool_use), user(tool_result)]" + assert second_messages[1]["role"] == "assistant", "Assistant message must be auto-inserted before tool_result" + tool_result_content = second_messages[2].get("content", []) + assert isinstance(tool_result_content, list) and any( + isinstance(b, dict) and b.get("type") == "tool_result" for b in tool_result_content + ), "Third message must contain the tool_result block" + + def test_manual_tool_result_with_assistant_not_double_inserted(self) -> None: + """When the user provides both the assistant turn AND tool_result themselves, + the runner must not insert a second assistant message (regression guard for issue #1536).""" + + @beta_tool + def get_weather(location: str) -> BetaFunctionToolResultType: + """Get the weather for a location.""" + return f"Sunny in {location}" + + tool_use_block = NonCallableMagicMock() + tool_use_block.type = "tool_use" + tool_use_block.id = "toolu_unit_003" + tool_use_block.name = "get_weather" + tool_use_block.input = {"location": "SF"} + + tool_use_msg = NonCallableMagicMock() + tool_use_msg.role = "assistant" + tool_use_msg.stop_reason = "tool_use" + tool_use_msg.container = None + tool_use_msg.content = [tool_use_block] + + text_block = NonCallableMagicMock() + text_block.type = "text" + text_block.text = "The weather is sunny." + + end_turn_msg = NonCallableMagicMock() + end_turn_msg.role = "assistant" + end_turn_msg.stop_reason = "end_turn" + end_turn_msg.container = None + end_turn_msg.content = [text_block] + + call_count = 0 + parse_calls: List[Any] = [] + + def mock_parse(**kwargs: Any) -> Any: + nonlocal call_count + call_count += 1 + parse_calls.append(kwargs) + return tool_use_msg if call_count == 1 else end_turn_msg + + client = Anthropic(api_key="test-key", base_url="http://127.0.0.1:1") + runner = client.beta.messages.tool_runner( + model="claude-haiku-4-5", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in SF?"}], + ) + + with patch.object(client.beta.messages, "parse", side_effect=mock_parse): + for msg in runner: + if msg.stop_reason == "tool_use": + # User manually builds the full (assistant, tool_result) pair. + runner.append_messages( + BetaMessageParam(role="assistant", content=msg.content), + BetaMessageParam( + role="user", + content=[ + BetaToolResultBlockParam( + tool_use_id=msg.content[0].id, + content="20°C and sunny", + type="tool_result", + ) + ], + ), + ) + + assert call_count == 2, "Expected exactly 2 API calls" + + second_messages: List[Any] = parse_calls[1]["messages"] + assert len(second_messages) == 3, "Expected [initial_user, asst(tool_use), user(tool_result)] — no duplicates" + roles = [m["role"] for m in second_messages] + assert roles == ["user", "assistant", "user"], "Runner must not double-insert the assistant message" + @pytest.mark.parametrize( "http_snapshot", [