diff --git a/crates/goose/src/providers/formats/mod.rs b/crates/goose/src/providers/formats/mod.rs
index d28f06a54208..d70169046a8c 100644
--- a/crates/goose/src/providers/formats/mod.rs
+++ b/crates/goose/src/providers/formats/mod.rs
@@ -3,6 +3,7 @@ pub mod bedrock;
pub mod databricks;
pub mod gcpvertexai;
pub mod google;
+pub mod ollama;
pub mod openai;
pub mod openai_responses;
pub mod openrouter;
diff --git a/crates/goose/src/providers/formats/ollama.rs b/crates/goose/src/providers/formats/ollama.rs
new file mode 100644
index 000000000000..5c4c124fdb9e
--- /dev/null
+++ b/crates/goose/src/providers/formats/ollama.rs
@@ -0,0 +1,422 @@
+//! Ollama-specific response handling with XML tool call fallback.
+//!
+//! Some models running through Ollama (notably Qwen3-coder) output XML-style tool calls
+//! when given many tools (6+), instead of using the native JSON tool_calls format.
+//! This module wraps the standard OpenAI response parsing with XML fallback logic,
+//! isolating this behavior to the Ollama provider only.
+//!
+//! Known affected models:
+//! - qwen3-coder
+//! - qwen3-coder-32b
+
+use crate::conversation::message::{Message, MessageContent};
+use crate::providers::base::ProviderUsage;
+use crate::providers::utils::is_valid_function_name;
+use async_stream::try_stream;
+use chrono;
+use futures::Stream;
+use regex::Regex;
+use rmcp::model::{object, CallToolRequestParams, ErrorCode, ErrorData, Role};
+use serde_json::Value;
+use std::borrow::Cow;
+use uuid::Uuid;
+
+pub use super::openai::{
+ create_request, format_messages, format_tools, get_usage, validate_tool_schemas,
+};
+
+/// Parse XML-style tool calls from content (Ollama/Qwen3-coder fallback format).
+///
+/// Format: `value...`
+///
+/// Returns a tuple of (prefix_text, tool_calls) where prefix_text is any text before the first function tag.
+pub fn parse_xml_tool_calls(content: &str) -> (Option, Vec) {
+ let mut tool_calls = Vec::new();
+
+ let function_re = Regex::new(r"]+)>([\s\S]*?)").unwrap();
+ let param_re = Regex::new(r"]+)>([\s\S]*?)").unwrap();
+
+ let prefix = content
+ .find(" anyhow::Result {
+ let message = super::openai::response_to_message(response)?;
+
+ let has_tool_requests = message
+ .content
+ .iter()
+ .any(|c| matches!(c, MessageContent::ToolRequest(_)));
+
+ if has_tool_requests {
+ return Ok(message);
+ }
+
+ let original = response
+ .get("choices")
+ .and_then(|c| c.get(0))
+ .and_then(|m| m.get("message"));
+
+ if let Some(original) = original {
+ if let Some(text) = original.get("content").and_then(|c| c.as_str()) {
+ if text.contains(" String {
+ message
+ .content
+ .iter()
+ .filter_map(|c| {
+ if let MessageContent::Text(text) = c {
+ Some(text.text.as_str())
+ } else {
+ None
+ }
+ })
+ .collect::>()
+ .join("")
+}
+
+/// Check if a message contains only text content (no tool requests/responses).
+fn is_text_only_message(message: &Message) -> bool {
+ message
+ .content
+ .iter()
+ .all(|c| matches!(c, MessageContent::Text(_)))
+}
+
+/// Streaming message handler with XML tool call post-processing for Ollama.
+///
+/// This wraps the standard OpenAI streaming handler and post-processes messages
+/// to detect and parse XML tool calls. When XML markers are detected in text
+/// messages, it buffers them until the stream completes, then parses and emits
+/// the tool calls.
+///
+/// This approach avoids exposing any internal types from openai.rs.
+pub fn response_to_streaming_message_ollama(
+ stream: S,
+) -> impl Stream- , Option)>> + 'static
+where
+ S: Stream
- > + Unpin + Send + 'static,
+{
+ try_stream! {
+ use futures::StreamExt;
+
+ let base_stream = super::openai::response_to_streaming_message(stream);
+ let mut base_stream = std::pin::pin!(base_stream);
+
+ let mut accumulated_text = String::new();
+ let mut xml_detected = false;
+ let mut last_usage: Option = None;
+
+ while let Some(result) = base_stream.next().await {
+ let (message_opt, usage) = result?;
+
+ if usage.is_some() {
+ last_usage = usage.clone();
+ }
+
+ if let Some(message) = message_opt {
+ if is_text_only_message(&message) {
+ let text = extract_text_from_message(&message);
+ accumulated_text.push_str(&text);
+
+ if !xml_detected && accumulated_text.contains("
+write
+/tmp/test.txt
+hello world
+"#;
+
+ let (prefix, tool_calls) = parse_xml_tool_calls(content);
+
+ assert!(prefix.is_none(), "Should have no prefix");
+ assert_eq!(tool_calls.len(), 1, "Should have 1 tool call");
+
+ if let MessageContent::ToolRequest(request) = &tool_calls[0] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "developer__text_editor");
+ let args = tool_call.arguments.as_ref().unwrap();
+ assert_eq!(args.get("command").unwrap(), "write");
+ assert_eq!(args.get("path").unwrap(), "/tmp/test.txt");
+ assert_eq!(args.get("file_text").unwrap(), "hello world");
+ } else {
+ panic!("Expected ToolRequest content");
+ }
+ }
+
+ #[test]
+ fn test_parse_xml_tool_calls_with_prefix() {
+ let content = r#"I'll create the file for you.
+
+
+write
+/tmp/test.txt
+"#;
+
+ let (prefix, tool_calls) = parse_xml_tool_calls(content);
+
+ assert_eq!(
+ prefix,
+ Some("I'll create the file for you.".to_string()),
+ "Should have prefix text"
+ );
+ assert_eq!(tool_calls.len(), 1, "Should have 1 tool call");
+ }
+
+ #[test]
+ fn test_parse_xml_tool_calls_multiple() {
+ let content = r#"
+ls -la
+
+
+view
+/tmp/test.txt
+"#;
+
+ let (prefix, tool_calls) = parse_xml_tool_calls(content);
+
+ assert!(prefix.is_none());
+ assert_eq!(tool_calls.len(), 2, "Should have 2 tool calls");
+
+ if let MessageContent::ToolRequest(request) = &tool_calls[0] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "developer__shell");
+ } else {
+ panic!("Expected ToolRequest content");
+ }
+
+ if let MessageContent::ToolRequest(request) = &tool_calls[1] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "developer__text_editor");
+ } else {
+ panic!("Expected ToolRequest content");
+ }
+ }
+
+ #[test]
+ fn test_parse_xml_tool_calls_no_match() {
+ let content = "This is just regular text without any tool calls.";
+
+ let (prefix, tool_calls) = parse_xml_tool_calls(content);
+
+ assert!(prefix.is_none());
+ assert!(tool_calls.is_empty(), "Should have no tool calls");
+ }
+
+ #[test]
+ fn test_parse_xml_tool_calls_qwen_format() {
+ // Test the exact format observed from Qwen3-coder via Ollama
+ let content = r#"I'll create a file at /tmp/hello.txt with the content "hello".
+
+
+
+write
+
+
+/tmp/hello.txt
+
+
+hello
+
+
+"#;
+
+ let (prefix, tool_calls) = parse_xml_tool_calls(content);
+
+ assert!(prefix.is_some(), "Should have prefix");
+ assert_eq!(tool_calls.len(), 1, "Should have 1 tool call");
+
+ if let MessageContent::ToolRequest(request) = &tool_calls[0] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "developer__text_editor");
+ let args = tool_call.arguments.as_ref().unwrap();
+ assert_eq!(args.get("command").unwrap(), "write");
+ assert_eq!(args.get("path").unwrap(), "/tmp/hello.txt");
+ assert_eq!(args.get("file_text").unwrap(), "hello");
+ } else {
+ panic!("Expected ToolRequest content");
+ }
+ }
+
+ #[test]
+ fn test_response_to_message_xml_fallback() -> anyhow::Result<()> {
+ // Test that response_to_message falls back to XML parsing when no JSON tool_calls
+ let response = json!({
+ "choices": [{
+ "message": {
+ "role": "assistant",
+ "content": "\nls\n"
+ }
+ }]
+ });
+
+ let message = response_to_message(&response)?;
+
+ assert_eq!(message.content.len(), 1);
+ if let MessageContent::ToolRequest(request) = &message.content[0] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "developer__shell");
+ } else {
+ panic!("Expected ToolRequest content from XML parsing");
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_response_to_message_prefers_json_over_xml() -> anyhow::Result<()> {
+ // Test that JSON tool_calls take precedence over XML in content
+ let response = json!({
+ "choices": [{
+ "message": {
+ "role": "assistant",
+ "content": "\ny\n",
+ "tool_calls": [{
+ "id": "call_123",
+ "function": {
+ "name": "correct_tool",
+ "arguments": "{\"a\": \"b\"}"
+ }
+ }]
+ }
+ }]
+ });
+
+ let message = response_to_message(&response)?;
+
+ // Should have both text (from content) and tool request (from tool_calls)
+ // The XML in content should NOT be parsed since we have JSON tool_calls
+ let tool_requests: Vec<_> = message
+ .content
+ .iter()
+ .filter(|c| matches!(c, MessageContent::ToolRequest(_)))
+ .collect();
+
+ assert_eq!(tool_requests.len(), 1);
+ if let MessageContent::ToolRequest(request) = tool_requests[0] {
+ let tool_call = request.tool_call.as_ref().unwrap();
+ assert_eq!(tool_call.name, "correct_tool");
+ } else {
+ panic!("Expected ToolRequest");
+ }
+
+ Ok(())
+ }
+}
diff --git a/crates/goose/src/providers/ollama.rs b/crates/goose/src/providers/ollama.rs
index 353e84b1dc1f..12f06eb67708 100644
--- a/crates/goose/src/providers/ollama.rs
+++ b/crates/goose/src/providers/ollama.rs
@@ -3,9 +3,7 @@ use super::base::{
ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata, ProviderUsage, Usage,
};
use super::errors::ProviderError;
-use super::openai_compatible::{
- handle_response_openai_compat, handle_status_openai_compat, stream_openai_compat,
-};
+use super::openai_compatible::{handle_response_openai_compat, handle_status_openai_compat};
use super::retry::ProviderRetry;
use super::utils::{get_model, ImageFormat, RequestLog};
use crate::config::declarative_providers::DeclarativeProviderConfig;
@@ -13,15 +11,24 @@ use crate::config::GooseMode;
use crate::conversation::message::Message;
use crate::conversation::Conversation;
use crate::model::ModelConfig;
-use crate::providers::formats::openai::{create_request, get_usage, response_to_message};
+use crate::providers::formats::ollama::{
+ create_request, get_usage, response_to_message, response_to_streaming_message_ollama,
+};
use crate::utils::safe_truncate;
-use anyhow::Result;
+use anyhow::{Error, Result};
+use async_stream::try_stream;
use async_trait::async_trait;
use futures::future::BoxFuture;
+use futures::TryStreamExt;
use regex::Regex;
+use reqwest::Response;
use rmcp::model::Tool;
use serde_json::Value;
use std::time::Duration;
+use tokio::pin;
+use tokio_stream::StreamExt;
+use tokio_util::codec::{FramedRead, LinesCodec};
+use tokio_util::io::StreamReader;
use url::Url;
const OLLAMA_PROVIDER_NAME: &str = "ollama";
@@ -297,7 +304,7 @@ impl Provider for OllamaProvider {
.inspect_err(|e| {
let _ = log.error(e);
})?;
- stream_openai_compat(response, log)
+ stream_ollama(response, log)
}
async fn fetch_supported_models(&self) -> Result