From 80a77198b58481d03efae74181113ac29164a6d6 Mon Sep 17 00:00:00 2001 From: kuccello Date: Sat, 7 Feb 2026 07:27:37 -0800 Subject: [PATCH] feat: add XML tool call parsing fallback for Qwen3-coder via Ollama When using Qwen3-coder model through Ollama with many tools (6+), the model outputs XML-style tool calls in the content field instead of using the native JSON tool_calls format. This causes Goose to fail to execute any tools. This commit adds XML parsing as a fallback mechanism: - Create formats/ollama.rs with XML tool call parsing logic - Add parse_xml_tool_calls() to parse value format - Wrap standard OpenAI response parsing with XML fallback for Ollama - Add response_to_streaming_message_ollama() that buffers text when XML markers detected - Update ollama.rs to use Ollama-specific format module and streaming - Add comprehensive unit tests for XML parsing The fix is backward-compatible and only activates when JSON tool_calls are absent, ensuring existing providers continue to work normally. The XML parsing is isolated to the Ollama provider only. Tested with Qwen3-coder:latest via Ollama with 11 developer tools. Signed-off-by: kuccello --- crates/goose/src/providers/formats/mod.rs | 1 + crates/goose/src/providers/formats/ollama.rs | 422 +++++++++++++++++++ crates/goose/src/providers/ollama.rs | 42 +- 3 files changed, 459 insertions(+), 6 deletions(-) create mode 100644 crates/goose/src/providers/formats/ollama.rs diff --git a/crates/goose/src/providers/formats/mod.rs b/crates/goose/src/providers/formats/mod.rs index d28f06a54208..d70169046a8c 100644 --- a/crates/goose/src/providers/formats/mod.rs +++ b/crates/goose/src/providers/formats/mod.rs @@ -3,6 +3,7 @@ pub mod bedrock; pub mod databricks; pub mod gcpvertexai; pub mod google; +pub mod ollama; pub mod openai; pub mod openai_responses; pub mod openrouter; diff --git a/crates/goose/src/providers/formats/ollama.rs b/crates/goose/src/providers/formats/ollama.rs new file mode 100644 index 000000000000..5c4c124fdb9e --- /dev/null +++ b/crates/goose/src/providers/formats/ollama.rs @@ -0,0 +1,422 @@ +//! Ollama-specific response handling with XML tool call fallback. +//! +//! Some models running through Ollama (notably Qwen3-coder) output XML-style tool calls +//! when given many tools (6+), instead of using the native JSON tool_calls format. +//! This module wraps the standard OpenAI response parsing with XML fallback logic, +//! isolating this behavior to the Ollama provider only. +//! +//! Known affected models: +//! - qwen3-coder +//! - qwen3-coder-32b + +use crate::conversation::message::{Message, MessageContent}; +use crate::providers::base::ProviderUsage; +use crate::providers::utils::is_valid_function_name; +use async_stream::try_stream; +use chrono; +use futures::Stream; +use regex::Regex; +use rmcp::model::{object, CallToolRequestParams, ErrorCode, ErrorData, Role}; +use serde_json::Value; +use std::borrow::Cow; +use uuid::Uuid; + +pub use super::openai::{ + create_request, format_messages, format_tools, get_usage, validate_tool_schemas, +}; + +/// Parse XML-style tool calls from content (Ollama/Qwen3-coder fallback format). +/// +/// Format: `value...` +/// +/// Returns a tuple of (prefix_text, tool_calls) where prefix_text is any text before the first function tag. +pub fn parse_xml_tool_calls(content: &str) -> (Option, Vec) { + let mut tool_calls = Vec::new(); + + let function_re = Regex::new(r"]+)>([\s\S]*?)").unwrap(); + let param_re = Regex::new(r"]+)>([\s\S]*?)").unwrap(); + + let prefix = content + .find(" anyhow::Result { + let message = super::openai::response_to_message(response)?; + + let has_tool_requests = message + .content + .iter() + .any(|c| matches!(c, MessageContent::ToolRequest(_))); + + if has_tool_requests { + return Ok(message); + } + + let original = response + .get("choices") + .and_then(|c| c.get(0)) + .and_then(|m| m.get("message")); + + if let Some(original) = original { + if let Some(text) = original.get("content").and_then(|c| c.as_str()) { + if text.contains(" String { + message + .content + .iter() + .filter_map(|c| { + if let MessageContent::Text(text) = c { + Some(text.text.as_str()) + } else { + None + } + }) + .collect::>() + .join("") +} + +/// Check if a message contains only text content (no tool requests/responses). +fn is_text_only_message(message: &Message) -> bool { + message + .content + .iter() + .all(|c| matches!(c, MessageContent::Text(_))) +} + +/// Streaming message handler with XML tool call post-processing for Ollama. +/// +/// This wraps the standard OpenAI streaming handler and post-processes messages +/// to detect and parse XML tool calls. When XML markers are detected in text +/// messages, it buffers them until the stream completes, then parses and emits +/// the tool calls. +/// +/// This approach avoids exposing any internal types from openai.rs. +pub fn response_to_streaming_message_ollama( + stream: S, +) -> impl Stream, Option)>> + 'static +where + S: Stream> + Unpin + Send + 'static, +{ + try_stream! { + use futures::StreamExt; + + let base_stream = super::openai::response_to_streaming_message(stream); + let mut base_stream = std::pin::pin!(base_stream); + + let mut accumulated_text = String::new(); + let mut xml_detected = false; + let mut last_usage: Option = None; + + while let Some(result) = base_stream.next().await { + let (message_opt, usage) = result?; + + if usage.is_some() { + last_usage = usage.clone(); + } + + if let Some(message) = message_opt { + if is_text_only_message(&message) { + let text = extract_text_from_message(&message); + accumulated_text.push_str(&text); + + if !xml_detected && accumulated_text.contains(" +write +/tmp/test.txt +hello world +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none(), "Should have no prefix"); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + let args = tool_call.arguments.as_ref().unwrap(); + assert_eq!(args.get("command").unwrap(), "write"); + assert_eq!(args.get("path").unwrap(), "/tmp/test.txt"); + assert_eq!(args.get("file_text").unwrap(), "hello world"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_parse_xml_tool_calls_with_prefix() { + let content = r#"I'll create the file for you. + + +write +/tmp/test.txt +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert_eq!( + prefix, + Some("I'll create the file for you.".to_string()), + "Should have prefix text" + ); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + } + + #[test] + fn test_parse_xml_tool_calls_multiple() { + let content = r#" +ls -la + + +view +/tmp/test.txt +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none()); + assert_eq!(tool_calls.len(), 2, "Should have 2 tool calls"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__shell"); + } else { + panic!("Expected ToolRequest content"); + } + + if let MessageContent::ToolRequest(request) = &tool_calls[1] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_parse_xml_tool_calls_no_match() { + let content = "This is just regular text without any tool calls."; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none()); + assert!(tool_calls.is_empty(), "Should have no tool calls"); + } + + #[test] + fn test_parse_xml_tool_calls_qwen_format() { + // Test the exact format observed from Qwen3-coder via Ollama + let content = r#"I'll create a file at /tmp/hello.txt with the content "hello". + + + +write + + +/tmp/hello.txt + + +hello + + +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_some(), "Should have prefix"); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + let args = tool_call.arguments.as_ref().unwrap(); + assert_eq!(args.get("command").unwrap(), "write"); + assert_eq!(args.get("path").unwrap(), "/tmp/hello.txt"); + assert_eq!(args.get("file_text").unwrap(), "hello"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_response_to_message_xml_fallback() -> anyhow::Result<()> { + // Test that response_to_message falls back to XML parsing when no JSON tool_calls + let response = json!({ + "choices": [{ + "message": { + "role": "assistant", + "content": "\nls\n" + } + }] + }); + + let message = response_to_message(&response)?; + + assert_eq!(message.content.len(), 1); + if let MessageContent::ToolRequest(request) = &message.content[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__shell"); + } else { + panic!("Expected ToolRequest content from XML parsing"); + } + + Ok(()) + } + + #[test] + fn test_response_to_message_prefers_json_over_xml() -> anyhow::Result<()> { + // Test that JSON tool_calls take precedence over XML in content + let response = json!({ + "choices": [{ + "message": { + "role": "assistant", + "content": "\ny\n", + "tool_calls": [{ + "id": "call_123", + "function": { + "name": "correct_tool", + "arguments": "{\"a\": \"b\"}" + } + }] + } + }] + }); + + let message = response_to_message(&response)?; + + // Should have both text (from content) and tool request (from tool_calls) + // The XML in content should NOT be parsed since we have JSON tool_calls + let tool_requests: Vec<_> = message + .content + .iter() + .filter(|c| matches!(c, MessageContent::ToolRequest(_))) + .collect(); + + assert_eq!(tool_requests.len(), 1); + if let MessageContent::ToolRequest(request) = tool_requests[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "correct_tool"); + } else { + panic!("Expected ToolRequest"); + } + + Ok(()) + } +} diff --git a/crates/goose/src/providers/ollama.rs b/crates/goose/src/providers/ollama.rs index 353e84b1dc1f..12f06eb67708 100644 --- a/crates/goose/src/providers/ollama.rs +++ b/crates/goose/src/providers/ollama.rs @@ -3,9 +3,7 @@ use super::base::{ ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata, ProviderUsage, Usage, }; use super::errors::ProviderError; -use super::openai_compatible::{ - handle_response_openai_compat, handle_status_openai_compat, stream_openai_compat, -}; +use super::openai_compatible::{handle_response_openai_compat, handle_status_openai_compat}; use super::retry::ProviderRetry; use super::utils::{get_model, ImageFormat, RequestLog}; use crate::config::declarative_providers::DeclarativeProviderConfig; @@ -13,15 +11,24 @@ use crate::config::GooseMode; use crate::conversation::message::Message; use crate::conversation::Conversation; use crate::model::ModelConfig; -use crate::providers::formats::openai::{create_request, get_usage, response_to_message}; +use crate::providers::formats::ollama::{ + create_request, get_usage, response_to_message, response_to_streaming_message_ollama, +}; use crate::utils::safe_truncate; -use anyhow::Result; +use anyhow::{Error, Result}; +use async_stream::try_stream; use async_trait::async_trait; use futures::future::BoxFuture; +use futures::TryStreamExt; use regex::Regex; +use reqwest::Response; use rmcp::model::Tool; use serde_json::Value; use std::time::Duration; +use tokio::pin; +use tokio_stream::StreamExt; +use tokio_util::codec::{FramedRead, LinesCodec}; +use tokio_util::io::StreamReader; use url::Url; const OLLAMA_PROVIDER_NAME: &str = "ollama"; @@ -297,7 +304,7 @@ impl Provider for OllamaProvider { .inspect_err(|e| { let _ = log.error(e); })?; - stream_openai_compat(response, log) + stream_ollama(response, log) } async fn fetch_supported_models(&self) -> Result>, ProviderError> { @@ -376,3 +383,26 @@ impl OllamaProvider { filtered } } + +/// Ollama-specific streaming handler with XML tool call fallback. +/// Uses the Ollama format module which buffers text when XML tool calls are detected, +/// preventing duplicate content from being emitted to the UI. +fn stream_ollama(response: Response, mut log: RequestLog) -> Result { + let stream = response.bytes_stream().map_err(std::io::Error::other); + + Ok(Box::pin(try_stream! { + let stream_reader = StreamReader::new(stream); + let framed = FramedRead::new(stream_reader, LinesCodec::new()) + .map_err(Error::from); + + let message_stream = response_to_streaming_message_ollama(framed); + pin!(message_stream); + while let Some(message) = message_stream.next().await { + let (message, usage) = message.map_err(|e| + ProviderError::RequestFailed(format!("Stream decode error: {}", e)) + )?; + log.write(&message, usage.as_ref().map(|f| f.usage).as_ref())?; + yield (message, usage); + } + })) +}