diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs index fd2dbd6f6608..2c82e79c4f89 100644 --- a/crates/goose/src/providers/formats/openai.rs +++ b/crates/goose/src/providers/formats/openai.rs @@ -9,6 +9,7 @@ use anyhow::{anyhow, Error}; use async_stream::try_stream; use chrono; use futures::Stream; +use regex::Regex; use rmcp::model::{ object, AnnotateAble, CallToolRequestParams, Content, ErrorCode, ErrorData, RawContent, ResourceContents, Role, Tool, @@ -17,6 +18,62 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::borrow::Cow; use std::ops::Deref; +use uuid::Uuid; + +/// Parse XML-style tool calls from content (Qwen3-coder fallback format when many tools are provided) +/// Format: value... +/// Returns a tuple of (prefix_text, tool_calls) where prefix_text is any text before the first function tag +fn parse_xml_tool_calls(content: &str) -> (Option, Vec) { + let mut tool_calls = Vec::new(); + + let function_re = Regex::new(r"]+)>([\s\S]*?)").unwrap(); + let param_re = Regex::new(r"]+)>([\s\S]*?)").unwrap(); + + let prefix = content + .find(" anyhow::Result { } } + // Fallback: If no JSON tool_calls found, check for XML-style tool calls in content + // This handles models like Qwen3-coder that output XML when given many tools + let has_tool_requests = content + .iter() + .any(|c| matches!(c, MessageContent::ToolRequest(_))); + + if !has_tool_requests { + if let Some(text) = original.get("content").and_then(|c| c.as_str()) { + if text.contains(" = Vec::new(); + // Track accumulated text content for XML tool call detection + let mut accumulated_text: String = String::new(); + let mut last_chunk_id: Option = None; 'outer: while let Some(response) = stream.next().await { if response.as_ref().is_ok_and(|s| s == "data: [DONE]") { @@ -493,6 +575,11 @@ where } } + // Track chunk ID for message construction + if chunk.id.is_some() { + last_chunk_id = chunk.id.clone(); + } + let mut usage = extract_usage_with_output_tokens(&chunk); if chunk.choices.is_empty() { @@ -620,6 +707,38 @@ where ) } else if chunk.choices[0].delta.content.is_some() { let text = chunk.choices[0].delta.content.as_ref().unwrap(); + + accumulated_text.push_str(text); + + let is_final = chunk.choices[0].finish_reason.is_some(); + + if is_final && accumulated_text.contains(" +write +/tmp/test.txt +hello world +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none(), "Should have no prefix"); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + let args = tool_call.arguments.as_ref().unwrap(); + assert_eq!(args.get("command").unwrap(), "write"); + assert_eq!(args.get("path").unwrap(), "/tmp/test.txt"); + assert_eq!(args.get("file_text").unwrap(), "hello world"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_parse_xml_tool_calls_with_prefix() { + let content = r#"I'll create the file for you. + + +write +/tmp/test.txt +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert_eq!( + prefix, + Some("I'll create the file for you.".to_string()), + "Should have prefix text" + ); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + } + + #[test] + fn test_parse_xml_tool_calls_multiple() { + let content = r#" +ls -la + + +view +/tmp/test.txt +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none()); + assert_eq!(tool_calls.len(), 2, "Should have 2 tool calls"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__shell"); + } else { + panic!("Expected ToolRequest content"); + } + + if let MessageContent::ToolRequest(request) = &tool_calls[1] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_parse_xml_tool_calls_no_match() { + let content = "This is just regular text without any tool calls."; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_none()); + assert!(tool_calls.is_empty(), "Should have no tool calls"); + } + + #[test] + fn test_parse_xml_tool_calls_qwen_format() { + // Test the exact format observed from Qwen3-coder via Ollama + let content = r#"I'll create a file at /tmp/hello.txt with the content "hello". + + + +write + + +/tmp/hello.txt + + +hello + + +"#; + + let (prefix, tool_calls) = parse_xml_tool_calls(content); + + assert!(prefix.is_some(), "Should have prefix"); + assert_eq!(tool_calls.len(), 1, "Should have 1 tool call"); + + if let MessageContent::ToolRequest(request) = &tool_calls[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__text_editor"); + let args = tool_call.arguments.as_ref().unwrap(); + assert_eq!(args.get("command").unwrap(), "write"); + assert_eq!(args.get("path").unwrap(), "/tmp/hello.txt"); + assert_eq!(args.get("file_text").unwrap(), "hello"); + } else { + panic!("Expected ToolRequest content"); + } + } + + #[test] + fn test_response_to_message_xml_fallback() -> anyhow::Result<()> { + // Test that response_to_message falls back to XML parsing when no JSON tool_calls + let response = json!({ + "choices": [{ + "message": { + "role": "assistant", + "content": "\nls\n" + } + }] + }); + + let message = response_to_message(&response)?; + + assert_eq!(message.content.len(), 1); + if let MessageContent::ToolRequest(request) = &message.content[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "developer__shell"); + } else { + panic!("Expected ToolRequest content from XML parsing"); + } + + Ok(()) + } + + #[test] + fn test_response_to_message_prefers_json_over_xml() -> anyhow::Result<()> { + // Test that JSON tool_calls take precedence over XML in content + let response = json!({ + "choices": [{ + "message": { + "role": "assistant", + "content": "\ny\n", + "tool_calls": [{ + "id": "call_123", + "function": { + "name": "correct_tool", + "arguments": "{\"a\": \"b\"}" + } + }] + } + }] + }); + + let message = response_to_message(&response)?; + + // Should have both text (from content) and tool request (from tool_calls) + // The XML in content should NOT be parsed since we have JSON tool_calls + let tool_requests: Vec<_> = message + .content + .iter() + .filter(|c| matches!(c, MessageContent::ToolRequest(_))) + .collect(); + + assert_eq!(tool_requests.len(), 1); + if let MessageContent::ToolRequest(request) = tool_requests[0] { + let tool_call = request.tool_call.as_ref().unwrap(); + assert_eq!(tool_call.name, "correct_tool"); + } else { + panic!("Expected ToolRequest"); + } + + Ok(()) + } }