microsoft · LEDazzio01 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · eavanvalkenburg
diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py
@@ -268,6 +268,13 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
 }
 """Maps ChatOptions keys to Ollama model option parameter names."""
 
+# Framework-level kwargs that are not supported by ollama.AsyncClient.chat().
+# These are silently stripped in _inner_get_response so they never reach the
+# Ollama API.
+_UNSUPPORTED_CHAT_KWARGS: set[str] = {
+    "allow_multiple_tool_calls",
+}
+
 OllamaChatOptionsT = TypeVar("OllamaChatOptionsT", bound=TypedDict, default="OllamaChatOptions", covariant=True)  # type: ignore[valid-type]
 
 
@@ -351,6 +358,10 @@ def _inner_get_response(
         stream: bool = False,
         **kwargs: Any,
     ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
+        # Filter out framework-level kwargs that are not supported by
+        # ollama.AsyncClient.chat().
+        filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS}
+
         if stream:
             # Streaming mode
             async def _stream() -> AsyncIterable[ChatResponseUpdate]:
@@ -360,7 +371,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat(  # type: ignore[misc]
                         stream=True,
                         **options_dict,
-                        **kwargs,
+                        **filtered_kwargs,
                     )
                 except Exception as ex:
                     raise ChatClientException(f"Ollama streaming chat request failed : {ex}", ex) from ex
@@ -378,7 +389,7 @@ async def _get_response() -> ChatResponse:
                 response: OllamaChatResponse = await self.client.chat(  # type: ignore[misc]
                     stream=False,
                     **options_dict,
-                    **kwargs,
+                    **filtered_kwargs,
                 )
             except Exception as ex:
                 raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex
@@ -395,8 +406,9 @@ def _prepare_options(self, messages: Sequence[Message], options: Mapping[str, An
 
             messages = prepend_instructions_to_messages(list(messages), instructions, role="system")
 
-        # Keys to exclude from processing
-        exclude_keys = {"instructions", "tool_choice"}
+        # Keys to exclude from processing — these are either handled separately
+        # or not supported by the Ollama API.
+        exclude_keys = {"instructions", "tool_choice", "allow_multiple_tool_calls"}
 
         # Build run_options and model_options separately
         run_options: dict[str, Any] = {}

diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py
@@ -404,6 +404,93 @@ async def test_cmc_with_dict_tool_passthrough(
     assert call_kwargs["tools"] == [{"type": "function", "function": {"name": "custom_tool", "parameters": {}}}]
 
 
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_filters_unsupported_kwargs(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_chat_completion_response: OllamaChatResponse,
+) -> None:
+    """Verify that unsupported kwargs (e.g. allow_multiple_tool_calls) are
+    silently filtered out and never forwarded to ollama.AsyncClient.chat().
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    # Pass allow_multiple_tool_calls as a top-level kwarg — this is what HandoffBuilder does
+    await ollama_client.get_response(
+        messages=chat_history,
+        allow_multiple_tool_calls=True,
+    )
+
+    # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_streaming_filters_unsupported_kwargs(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_streaming_chat_completion_response: AsyncStream[OllamaChatResponse],
+) -> None:
+    """Verify that unsupported kwargs are filtered in streaming mode too.
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_streaming_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    result = ollama_client.get_response(
+        messages=chat_history,
+        stream=True,
+        allow_multiple_tool_calls=True,
+    )
+
+    async for chunk in result:
+        assert chunk.text == "test"
+
+    # Verify allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_filters_unsupported_options(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_chat_completion_response: OllamaChatResponse,
+) -> None:
+    """Verify that unsupported keys inside the options dict (e.g. from
+    Agent.default_options or workflow cloning) are also stripped before
+    reaching ollama.AsyncClient.chat().
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    # Pass allow_multiple_tool_calls inside the options dict
+    await ollama_client.get_response(
+        messages=chat_history,
+        options={"allow_multiple_tool_calls": True},
+    )
+
+    # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
 @patch.object(AsyncClient, "chat", new_callable=AsyncMock)
 async def test_cmc_with_data_content_type(
     mock_chat: AsyncMock,