From 6ec79686f114e601f833d6e2722092c5c1925e67 Mon Sep 17 00:00:00 2001 From: Dev-iL <6509619+Dev-iL@users.noreply.github.com> Date: Sun, 31 May 2026 16:35:41 -0400 Subject: [PATCH 1/2] fix(chat): degrade bare-bracket parse failures to content instead of 500 --- common/chat.cpp | 25 +++++++++++++++++++++++++ tests/test-chat.cpp | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/common/chat.cpp b/common/chat.cpp index ef151691c382..e30febb67644 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2596,6 +2596,31 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); fflush(stderr); } + // If the partial AST recovered nothing usable, degrade to plain content (see + // the non-partial branch below for the rationale). This is essential for + // streaming: a bare-bracket format optimistically enters its tool-call branch + // at the first '[', and once the branch fails the AST drops the leading + // content. Without this fallback the streamed content would retract to empty + // and then the final non-partial parse would re-emit the whole text, so the + // client sees the pre-'[' prefix duplicated. Returning the input keeps content + // monotonic across the stream. Genuine partial tool calls populate tool_calls + // (or content) and are left untouched. + if (msg.content.empty() && msg.reasoning_content.empty() && msg.tool_calls.empty()) { + msg.content = input; + } + return msg; + } + // Final (non-partial) parse failure: degrade to plain content instead of aborting + // the request. Native bare-bracket formats (e.g. LFM2.5) optimistically treat any + // '[' as the start of a tool call, so ordinary assistant content that merely + // contains '[' (a list, an index, a type hint) would otherwise throw here and + // surface to the caller as an HTTP 500. Returning the generated text as content + // keeps the response alive; a genuinely intended-but-malformed tool call likewise + // degrades to visible content rather than crashing the whole completion. + if (!is_partial) { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = input; return msg; } throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 30ea2c07213c..83c5762e5ecf 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -4177,6 +4177,24 @@ static void test_template_output_peg_parsers(bool detailed_debug) { )) .run(); + // Bare-bracket content must not abort the request. LFM2.5 has no tool-call wrapper + // token, so the parser optimistically treats the first '[' as a tool-call start. + // Ordinary assistant content that merely contains '[' (a list/index/type hint) used + // to fail the final parse and surface as an HTTP 500; it must degrade to content. + tst.test("Here is a Python list: [1, 2, 3] and that is all.") + .tools({ special_function_tool }) + .expect_content("Here is a Python list: [1, 2, 3] and that is all.") + .expect_reconstruction(false) + .run(); + + // The reported production shape: a code-mode reply that ends up as content + // containing a bracketed data structure. + tst.test("trades = [\n {\"timestamp\": \"09:30:00\", \"price\": 150.10, \"size\": 100}\n]") + .tools({ special_function_tool }) + .expect_content("trades = [\n {\"timestamp\": \"09:30:00\", \"price\": 150.10, \"size\": 100}\n]") + .expect_reconstruction(false) + .run(); + // Partial tool call (streaming) tst.test("[special_function(arg1=") .tools({ special_function_tool }) From 0953a1bef906048fc8b80b4370ce247b1312f261 Mon Sep 17 00:00:00 2001 From: Dev-iL <6509619+Dev-iL@users.noreply.github.com> Date: Wed, 3 Jun 2026 10:05:47 -0400 Subject: [PATCH 2/2] chat: only treat [tool_name( as tool-call start in LFM2.5 parser --- common/chat.cpp | 33 +++++++-------------------------- tests/test-chat.cpp | 15 ++++++++++----- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index e30febb67644..09dcf388eb95 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1762,7 +1762,13 @@ static common_chat_params common_chat_params_init_lfm2_5(const common_chat_templ ) ); - auto content = p.content(p.until_one_of({"<|tool_call_start|>", "["})); + // A bare '[' in ordinary content (a list, an index, a type hint, etc.) is not a tool call; + // treating it as one would fail the parse and surface as an HTTP 500. + std::vector content_stops = { "<|tool_call_start|>" }; + foreach_function(inputs.tools, [&](const json & tool) { + content_stops.push_back("[" + tool.at("function").at("name").get() + "("); + }); + auto content = p.content(p.until_one_of(content_stops)); auto maybe_start = p.optional(p.literal("<|tool_call_start|>")); return generation_prompt + reasoning + content + maybe_start + tool_calls + end; }); @@ -2596,31 +2602,6 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); fflush(stderr); } - // If the partial AST recovered nothing usable, degrade to plain content (see - // the non-partial branch below for the rationale). This is essential for - // streaming: a bare-bracket format optimistically enters its tool-call branch - // at the first '[', and once the branch fails the AST drops the leading - // content. Without this fallback the streamed content would retract to empty - // and then the final non-partial parse would re-emit the whole text, so the - // client sees the pre-'[' prefix duplicated. Returning the input keeps content - // monotonic across the stream. Genuine partial tool calls populate tool_calls - // (or content) and are left untouched. - if (msg.content.empty() && msg.reasoning_content.empty() && msg.tool_calls.empty()) { - msg.content = input; - } - return msg; - } - // Final (non-partial) parse failure: degrade to plain content instead of aborting - // the request. Native bare-bracket formats (e.g. LFM2.5) optimistically treat any - // '[' as the start of a tool call, so ordinary assistant content that merely - // contains '[' (a list, an index, a type hint) would otherwise throw here and - // surface to the caller as an HTTP 500. Returning the generated text as content - // keeps the response alive; a genuinely intended-but-malformed tool call likewise - // degrades to visible content rather than crashing the whole completion. - if (!is_partial) { - common_chat_msg msg; - msg.role = "assistant"; - msg.content = input; return msg; } throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 83c5762e5ecf..fbb3a6f202f7 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -4178,13 +4178,12 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .run(); // Bare-bracket content must not abort the request. LFM2.5 has no tool-call wrapper - // token, so the parser optimistically treats the first '[' as a tool-call start. - // Ordinary assistant content that merely contains '[' (a list/index/type hint) used - // to fail the final parse and surface as an HTTP 500; it must degrade to content. + // token; content must only stop at an actual "[{tool_name}(" prefix (matching the + // grammar triggers). A '[' that doesn't start a defined tool call (a list/index/ + // type hint) used to fail the final parse and surface as an HTTP 500. tst.test("Here is a Python list: [1, 2, 3] and that is all.") .tools({ special_function_tool }) .expect_content("Here is a Python list: [1, 2, 3] and that is all.") - .expect_reconstruction(false) .run(); // The reported production shape: a code-mode reply that ends up as content @@ -4192,7 +4191,13 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test("trades = [\n {\"timestamp\": \"09:30:00\", \"price\": 150.10, \"size\": 100}\n]") .tools({ special_function_tool }) .expect_content("trades = [\n {\"timestamp\": \"09:30:00\", \"price\": 150.10, \"size\": 100}\n]") - .expect_reconstruction(false) + .run(); + + // Streaming: a bare '[' mid-content must not retract previously streamed content. + tst.test("Here is a Python list: [1, 2") + .tools({ special_function_tool }) + .is_partial(true) + .expect_content("Here is a Python list: [1, 2") .run(); // Partial tool call (streaming)