From 0083bb18d6ac197bb463662c79569a8172811554 Mon Sep 17 00:00:00 2001 From: James O'Leary Date: Mon, 16 Mar 2026 17:58:09 -0700 Subject: [PATCH] chat : fix AST fallback for non-partial parses, lenient TAG_WITH_TAGGED arg capture --- common/chat-auto-parser-generator.cpp | 18 ++- common/chat.cpp | 11 +- models/templates/Qwen3.5-0.8B.jinja | 155 ++++++++++++++++++ tests/test-chat.cpp | 219 ++++++++++++++++++++++++++ 4 files changed, 394 insertions(+), 9 deletions(-) create mode 100644 models/templates/Qwen3.5-0.8B.jinja diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index f19819494c8..ef526da89fc 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -327,16 +327,22 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte } } + // Capture argument values with until(value_suffix) instead of + // schema-validated json(). Even with grammar constraints active + // during generation, small models can produce slightly malformed + // JSON (e.g. extra trailing braces). Strict json() validation + // causes PEG backtracking to wipe the entire AST (including + // reasoning + tool name that already parsed successfully). + // until(value_suffix) captures the raw value and lets the caller + // decide what to do with it. + auto value_parser = type == "string" + ? p.tool_arg_string_value(p.until(arguments.value_suffix)) + : p.tool_arg_json_value(p.until(arguments.value_suffix)); auto arg = p.tool_arg( p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + - (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema, true)) : - p.tool_arg_json_value(p.schema( - p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) + - p.space()) + + value_parser + p.tool_arg_close(p.literal(arguments.value_suffix))); auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); diff --git a/common/chat.cpp b/common/chat.cpp index fb4751e5319..b1dc430b4e2 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1747,9 +1747,14 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars auto result = parser.parse(ctx); if (result.fail()) { - // During partial parsing, return partial results if any AST nodes were captured - // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK - if (is_partial && result.end > 0) { + // Return partial results if any AST nodes were captured + // This allows streaming to work correctly. + // Generally, callers will call with is_partial=false at the end of generation. + // If, for example, JSON ends up with an extra closing character, the full parse will fail. + // From the callers perspective, a tool call that was fully seen until the eog disappears. + // Additionally, callers do not expect this method to throw, and it is a core part of their + // inference loop. The exception should only be thrown in a truly exceptional situation. + if (result.end > 0) { // Try to extract any partial results from what was successfully parsed common_chat_msg msg; msg.role = "assistant"; diff --git a/models/templates/Qwen3.5-0.8B.jinja b/models/templates/Qwen3.5-0.8B.jinja new file mode 100644 index 00000000000..509ec9fc27d --- /dev/null +++ b/models/templates/Qwen3.5-0.8B.jinja @@ -0,0 +1,155 @@ +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is mapping %} + {%- for args_name in tool_call.arguments %} + {%- set args_value = tool_call.arguments[args_name] %} + {{- '\n' }} + {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is true %} + {{- '\n' }} + {%- else %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 3a6297e1480..52f6bcc0769 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1792,6 +1792,225 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); } + // ========================================================================= + // Qwen3.5-0.8B: template + basic tool calling + malformed JSON regression + // ========================================================================= + { + auto tst = peg_tester("models/templates/Qwen3.5-0.8B.jinja", /*detailed_debug=*/false); + + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + tst.test( + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "The user wants flashcards about California.\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .tools({ special_function_tool }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_reasoning("The user wants flashcards about California.\n") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + } + + // ========================================================================= + // REGRESSION: Qwen3.5 TAG_WITH_TAGGED with malformed JSON in args. + // + // Real model output: Qwen3.5-0.8B produces an extra trailing } in the + // parameter value, e.g. [{"query":"CA","recall":"LA"}]} instead of + // [{"query":"CA","recall":"LA"}]. + // + // Without patches: + // chat-auto-parser-generator.cpp: schema(json()) rejects malformed JSON, + // tool_call rule fails, PEG backtracks entire sequence, wipes AST + // chat.cpp: final parse (is_partial=false) throws std::runtime_error + // instead of using AST fallback + // => complete data loss, no finish_reason, client hangs + // + // With patches: + // chat-auto-parser-generator.cpp: until(value_suffix) captures raw value + // chat.cpp: AST fallback fires for all parses, not just partial + // => reasoning preserved, tool calls extracted + // ========================================================================= + { + auto tmpls = common_chat_templates_ptr( + common_chat_templates_init(nullptr, read_file("models/templates/Qwen3.5-0.8B.jinja"))); + + static common_chat_tool flashcards_tool{ + "flashcards", "Flashcards for studying", + R"({"type":"object","properties":{"flashcards":{"type":"array","items":{"type":"object","properties":{"query":{"type":"string"},"recall":{"type":"string"}},"required":["recall","query"]}}},"required":["flashcards"]})", + }; + + common_chat_templates_inputs inputs; + inputs.tools = { flashcards_tool }; + inputs.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + inputs.enable_thinking = true; + inputs.add_generation_prompt = true; + inputs.use_jinja = true; + inputs.messages = {{ "user", "make a flashcard" }}; + + auto params = common_chat_templates_apply(tmpls.get(), inputs); + common_peg_arena arena; + arena.load(params.parser); + + common_chat_parser_params pp; + pp.format = params.format; + + // Regression tests: real Qwen3.5-0.8B output with malformed JSON + // Test 1: single tool call, malformed JSON (extra }) + { + auto msg = common_chat_peg_parse(arena, + "I will make flashcards.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"California\", \"recall\": \"Los Angeles\"}]}\n" + "\n" + "\n" + "", + /* is_partial */ false, pp); + + assert(!msg.reasoning_content.empty() && "[malformed-1] reasoning must be extracted"); + assert(msg.content.empty() && "[malformed-1] content must be empty"); + assert(msg.tool_calls.size() == 1 && "[malformed-1] one tool call must be found"); + assert(msg.tool_calls[0].name == "flashcards" && "[malformed-1] tool name"); + assert(!msg.tool_calls[0].arguments.empty() && "[malformed-1] arguments must not be empty"); + } + + // Test 2: two tool calls, both malformed — must find both + { + auto msg = common_chat_peg_parse(arena, + "Two sets.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"CA\", \"recall\": \"LA\"}]}\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "[{\"query\": \"NY\", \"recall\": \"NYC\"}]}\n" + "\n" + "\n" + "", + /* is_partial */ false, pp); + + assert(!msg.reasoning_content.empty() && "[malformed-2] reasoning must be extracted"); + assert(msg.tool_calls.size() == 2 && "[malformed-2] two tool calls must be found"); + assert(msg.tool_calls[0].name == "flashcards" && "[malformed-2] tool 0 name"); + assert(msg.tool_calls[1].name == "flashcards" && "[malformed-2] tool 1 name"); + } + + // Test 3: truncated tool call (model hit max_tokens mid-call) + // Without chat.cpp fix, this throws std::runtime_error on is_partial=false. + { + auto msg = common_chat_peg_parse(arena, + "I will make flashcards.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"Califor", // truncated mid-value + /* is_partial */ false, pp); + + assert(!msg.reasoning_content.empty() && "[truncated] reasoning must survive"); + } + + // Test 4: well-formed JSON — no regression + { + auto msg = common_chat_peg_parse(arena, + "Making cards.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n" + "\n" + "\n" + "", + /* is_partial */ false, pp); + + assert(!msg.reasoning_content.empty() && "[wellformed] reasoning must be extracted"); + assert(msg.tool_calls.size() == 1 && "[wellformed] one tool call"); + assert(msg.tool_calls[0].name == "flashcards" && "[wellformed] tool name"); + } + + // Test from #20260: Qwen3.5-35B text between and . + // Real model output: transition sentence after thinking, before tool call. + // "Failed to parse input at pos N" in agentic loops. + { + auto msg = common_chat_peg_parse(arena, + "Let me check some more information.\n" + "\n" + "Let me review more key files.\n" + "\n" + "\n" + "\n" + "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n" + "\n" + "\n" + "", + /* is_partial */ false, pp); + + assert(!msg.reasoning_content.empty() && "[#20260] reasoning must survive"); + assert(msg.tool_calls.size() == 1 && "[#20260] tool call must be found"); + assert(msg.tool_calls[0].name == "flashcards" && "[#20260] tool name"); + } + } + + // Test from #20344: GPT-OSS structured JSON output in final channel. + // Real error: "Failed to parse input at pos 416: <|start|>assistant<|channel|>final<|message|>{...}" + { + auto tmpls_gptoss = common_chat_templates_ptr( + common_chat_templates_init(nullptr, read_file("models/templates/openai-gpt-oss-120b.jinja"))); + + common_chat_templates_inputs inputs_gptoss; + inputs_gptoss.json_schema = R"({"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]})"; + inputs_gptoss.add_generation_prompt = true; + inputs_gptoss.use_jinja = true; + inputs_gptoss.messages = {{ "user", "give me a person" }}; + + auto params_gptoss = common_chat_templates_apply(tmpls_gptoss.get(), inputs_gptoss); + common_peg_arena arena_gptoss; + arena_gptoss.load(params_gptoss.parser); + + common_chat_parser_params pp_gptoss; + pp_gptoss.format = params_gptoss.format; + + auto msg = common_chat_peg_parse(arena_gptoss, + "<|channel|>final<|message|>{\n" + " \"name\": \"John Doe\",\n" + " \"age\": 30\n" + "}", + /* is_partial */ false, pp_gptoss); + + assert(!msg.content.empty() && "[#20344] content must be extracted"); + } + { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); tst.test(