diff --git a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs index 0702e68cd..8093c3ade 100644 --- a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs +++ b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs @@ -202,6 +202,43 @@ await client.GetResponseAsync( Assert.Contains("\"role\":\"tool\"", body, StringComparison.Ordinal); } + [Fact] + public async Task ExtractsToolCalls_FromTextBasedFormat_WhenModelSkipsStructuredFormat() + { + var textWithToolCall = """ + Let me save that for you. + + + Important note + project:test + + + """; + + var responseJson = $"{{\"id\":\"1\",\"model\":\"test\",\"choices\":[{{\"finish_reason\":\"stop\",\"message\":{{\"role\":\"assistant\",\"content\":{System.Text.Json.JsonSerializer.Serialize(textWithToolCall)}}}}}]}}"; + + using var handler = new RecordingHandler(_ => new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent(responseJson, Encoding.UTF8, "application/json") + }); + using var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost:8000") }; + var endpoint = OpenAiCompatibleEndpoint.FromBaseUrl("http://localhost:8000"); + var client = new OpenAiCompatibleChatClient(httpClient, endpoint, "test-model"); + + var response = await client.GetResponseAsync([new ChatMessage(ChatRole.User, "save this")]); + + var toolCall = Assert.Single(response.Messages[^1].Contents.OfType()); + Assert.Equal("store_memory", toolCall.Name); + Assert.Equal("Important note", toolCall.Arguments!["Content"]?.ToString()); + Assert.Equal("project:test", toolCall.Arguments!["Domain"]?.ToString()); + Assert.Equal(ChatFinishReason.ToolCalls, response.FinishReason); + + var remainingText = response.Messages[^1].Contents.OfType().FirstOrDefault(); + Assert.NotNull(remainingText); + Assert.Contains("Let me save that", remainingText.Text); + Assert.DoesNotContain("", remainingText.Text); + } + private sealed class RecordingHandler : HttpMessageHandler { private readonly Func _handler; diff --git a/src/Netclaw.OpenAICompatible/OpenAiCompatibleChatClient.cs b/src/Netclaw.OpenAICompatible/OpenAiCompatibleChatClient.cs index 1d1835318..2a5af4c4e 100644 --- a/src/Netclaw.OpenAICompatible/OpenAiCompatibleChatClient.cs +++ b/src/Netclaw.OpenAICompatible/OpenAiCompatibleChatClient.cs @@ -54,23 +54,53 @@ public async IAsyncEnumerable GetStreamingResponseAsync( await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); using var reader = new StreamReader(stream, Encoding.UTF8); var pendingToolCalls = new Dictionary(); + var accumulatedText = new StringBuilder(); + var hadStructuredToolCalls = false; + ChatResponseUpdate? finalUpdate = null; while (!cancellationToken.IsCancellationRequested) { var line = await reader.ReadLineAsync(cancellationToken); if (line is null) - yield break; + break; if (string.IsNullOrWhiteSpace(line) || !line.StartsWith("data:", StringComparison.Ordinal)) continue; var ssePayload = line[5..].Trim(); if (ssePayload == "[DONE]") - yield break; + break; using var document = JsonDocument.Parse(ssePayload); foreach (var update in ParseStreamingUpdates(document.RootElement, pendingToolCalls)) + { + foreach (var tc in update.Contents.OfType()) + accumulatedText.Append(tc.Text); + + if (update.Contents.OfType().Any()) + hadStructuredToolCalls = true; + + if (update.FinishReason is not null) + finalUpdate = update; + yield return update; + } + } + + // Fallback: if the model stopped without structured tool calls but the text + // contains XML-like tool call blocks, emit a synthetic tool call update. + if (!hadStructuredToolCalls + && finalUpdate?.FinishReason != ChatFinishReason.ToolCalls + && accumulatedText.Length > 0) + { + var textToolCalls = TextToolCallParser.ExtractFromText(accumulatedText.ToString()); + if (textToolCalls.Count > 0) + { + yield return new ChatResponseUpdate(ChatRole.Assistant, textToolCalls.Cast().ToList()) + { + FinishReason = ChatFinishReason.ToolCalls + }; + } } } @@ -251,6 +281,7 @@ private static ChatResponse ParseChatResponse(JsonElement root) var choice = root.GetProperty("choices")[0]; var message = choice.GetProperty("message"); var contents = new List(); + var finishReason = ParseFinishReason(choice); if (message.TryGetProperty("reasoning_content", out var reasoning) && reasoning.ValueKind == JsonValueKind.String) @@ -264,11 +295,27 @@ private static ChatResponse ParseChatResponse(JsonElement root) contents.Add(new TextContent(content.GetString()!)); } + // Fallback: extract tool calls from text when model uses XML-like format + if (finishReason != ChatFinishReason.ToolCalls) + { + var textContent = content.ValueKind == JsonValueKind.String ? content.GetString() : null; + var textToolCalls = TextToolCallParser.ExtractFromText(textContent); + if (textToolCalls.Count > 0) + { + contents.RemoveAll(c => c is TextContent); + var cleaned = TextToolCallParser.StripToolCallText(textContent!); + if (!string.IsNullOrWhiteSpace(cleaned)) + contents.Add(new TextContent(cleaned)); + contents.AddRange(textToolCalls); + finishReason = ChatFinishReason.ToolCalls; + } + } + return new ChatResponse(new ChatMessage(ChatRole.Assistant, contents)) { ModelId = root.TryGetProperty("model", out var model) ? model.GetString() : null, ResponseId = root.TryGetProperty("id", out var id) ? id.GetString() : null, - FinishReason = ParseFinishReason(choice) + FinishReason = finishReason }; } diff --git a/src/Netclaw.OpenAICompatible/TextToolCallParser.cs b/src/Netclaw.OpenAICompatible/TextToolCallParser.cs new file mode 100644 index 000000000..645db0bf1 --- /dev/null +++ b/src/Netclaw.OpenAICompatible/TextToolCallParser.cs @@ -0,0 +1,69 @@ +using System.Text.RegularExpressions; +using Microsoft.Extensions.AI; + +namespace Netclaw.OpenAICompatible; + +/// +/// Extracts structured tool calls from LLM text responses when the model emits +/// tool calls as text (e.g. Qwen3.5's XML-like format) instead of using the +/// OpenAI-structured tool_calls response field. +/// +internal static partial class TextToolCallParser +{ + /// + /// Attempts to extract tool calls from a text string. + /// Returns an empty list if no text-based tool calls are found. + /// + public static List ExtractFromText(string? text) + { + if (string.IsNullOrWhiteSpace(text)) + return []; + + var results = new List(); + + foreach (Match block in ToolCallBlockRegex().Matches(text)) + { + var functionMatch = FunctionNameRegex().Match(block.Value); + if (!functionMatch.Success) + continue; + + var functionName = functionMatch.Groups[1].Value; + var arguments = new Dictionary(); + + foreach (Match param in ParameterRegex().Matches(block.Value)) + { + arguments[param.Groups[1].Value] = param.Groups[2].Value.Trim(); + } + + results.Add(new FunctionCallContent( + Guid.NewGuid().ToString("N"), + functionName, + arguments)); + } + + return results; + } + + /// + /// Removes text-based tool call blocks from the text, returning + /// the remaining content (if any). + /// + public static string StripToolCallText(string text) + { + return ToolCallBlockRegex().Replace(text, "").Trim(); + } + + // Matches the entire ... block + [GeneratedRegex(@"\s*]+)>\s*(.*?)\s*\s*", + RegexOptions.Singleline | RegexOptions.Compiled)] + private static partial Regex ToolCallBlockRegex(); + + // Extracts the function name from + [GeneratedRegex(@"]+)>", RegexOptions.Compiled)] + private static partial Regex FunctionNameRegex(); + + // Extracts parameter name and value from VALUE + [GeneratedRegex(@"]+)>(.*?)", + RegexOptions.Singleline | RegexOptions.Compiled)] + private static partial Regex ParameterRegex(); +}