diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
index f19819494c8..ef526da89fc 100644
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -327,16 +327,22 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
}
}
+ // Capture argument values with until(value_suffix) instead of
+ // schema-validated json(). Even with grammar constraints active
+ // during generation, small models can produce slightly malformed
+ // JSON (e.g. extra trailing braces). Strict json() validation
+ // causes PEG backtracking to wipe the entire AST (including
+ // reasoning + tool name that already parsed successfully).
+ // until(value_suffix) captures the raw value and lets the caller
+ // decide what to do with it.
+ auto value_parser = type == "string"
+ ? p.tool_arg_string_value(p.until(arguments.value_suffix))
+ : p.tool_arg_json_value(p.until(arguments.value_suffix));
auto arg = p.tool_arg(
p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
arguments.name_suffix) +
arguments.value_prefix +
- (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
- "tool-" + name + "-arg-" + param_name + "-schema",
- param_schema, true)) :
- p.tool_arg_json_value(p.schema(
- p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
- p.space()) +
+ value_parser +
p.tool_arg_close(p.literal(arguments.value_suffix)));
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
diff --git a/common/chat.cpp b/common/chat.cpp
index fb4751e5319..b1dc430b4e2 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1747,9 +1747,14 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars
auto result = parser.parse(ctx);
if (result.fail()) {
- // During partial parsing, return partial results if any AST nodes were captured
- // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
- if (is_partial && result.end > 0) {
+ // Return partial results if any AST nodes were captured
+ // This allows streaming to work correctly.
+ // Generally, callers will call with is_partial=false at the end of generation.
+ // If, for example, JSON ends up with an extra closing character, the full parse will fail.
+ // From the callers perspective, a tool call that was fully seen until the eog disappears.
+ // Additionally, callers do not expect this method to throw, and it is a core part of their
+ // inference loop. The exception should only be thrown in a truly exceptional situation.
+ if (result.end > 0) {
// Try to extract any partial results from what was successfully parsed
common_chat_msg msg;
msg.role = "assistant";
diff --git a/models/templates/Qwen3.5-0.8B.jinja b/models/templates/Qwen3.5-0.8B.jinja
new file mode 100644
index 00000000000..509ec9fc27d
--- /dev/null
+++ b/models/templates/Qwen3.5-0.8B.jinja
@@ -0,0 +1,155 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is mapping %}
+ {%- for args_name in tool_call.arguments %}
+ {%- set args_value = tool_call.arguments[args_name] %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 3a6297e1480..52f6bcc0769 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1792,6 +1792,225 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
}
+ // =========================================================================
+ // Qwen3.5-0.8B: template + basic tool calling + malformed JSON regression
+ // =========================================================================
+ {
+ auto tst = peg_tester("models/templates/Qwen3.5-0.8B.jinja", /*detailed_debug=*/false);
+
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ tst.test(
+ "\n"
+ "\n"
+ "\n"
+ "1\n"
+ "\n"
+ "\n"
+ "")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "The user wants flashcards about California.\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "1\n"
+ "\n"
+ "\n"
+ "")
+ .tools({ special_function_tool })
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect_reasoning("The user wants flashcards about California.\n")
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ })
+ .run();
+ }
+
+ // =========================================================================
+ // REGRESSION: Qwen3.5 TAG_WITH_TAGGED with malformed JSON in args.
+ //
+ // Real model output: Qwen3.5-0.8B produces an extra trailing } in the
+ // parameter value, e.g. [{"query":"CA","recall":"LA"}]} instead of
+ // [{"query":"CA","recall":"LA"}].
+ //
+ // Without patches:
+ // chat-auto-parser-generator.cpp: schema(json()) rejects malformed JSON,
+ // tool_call rule fails, PEG backtracks entire sequence, wipes AST
+ // chat.cpp: final parse (is_partial=false) throws std::runtime_error
+ // instead of using AST fallback
+ // => complete data loss, no finish_reason, client hangs
+ //
+ // With patches:
+ // chat-auto-parser-generator.cpp: until(value_suffix) captures raw value
+ // chat.cpp: AST fallback fires for all parses, not just partial
+ // => reasoning preserved, tool calls extracted
+ // =========================================================================
+ {
+ auto tmpls = common_chat_templates_ptr(
+ common_chat_templates_init(nullptr, read_file("models/templates/Qwen3.5-0.8B.jinja")));
+
+ static common_chat_tool flashcards_tool{
+ "flashcards", "Flashcards for studying",
+ R"({"type":"object","properties":{"flashcards":{"type":"array","items":{"type":"object","properties":{"query":{"type":"string"},"recall":{"type":"string"}},"required":["recall","query"]}}},"required":["flashcards"]})",
+ };
+
+ common_chat_templates_inputs inputs;
+ inputs.tools = { flashcards_tool };
+ inputs.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+ inputs.enable_thinking = true;
+ inputs.add_generation_prompt = true;
+ inputs.use_jinja = true;
+ inputs.messages = {{ "user", "make a flashcard" }};
+
+ auto params = common_chat_templates_apply(tmpls.get(), inputs);
+ common_peg_arena arena;
+ arena.load(params.parser);
+
+ common_chat_parser_params pp;
+ pp.format = params.format;
+
+ // Regression tests: real Qwen3.5-0.8B output with malformed JSON
+ // Test 1: single tool call, malformed JSON (extra })
+ {
+ auto msg = common_chat_peg_parse(arena,
+ "I will make flashcards.\n"
+ "\n\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"California\", \"recall\": \"Los Angeles\"}]}\n"
+ "\n"
+ "\n"
+ "",
+ /* is_partial */ false, pp);
+
+ assert(!msg.reasoning_content.empty() && "[malformed-1] reasoning must be extracted");
+ assert(msg.content.empty() && "[malformed-1] content must be empty");
+ assert(msg.tool_calls.size() == 1 && "[malformed-1] one tool call must be found");
+ assert(msg.tool_calls[0].name == "flashcards" && "[malformed-1] tool name");
+ assert(!msg.tool_calls[0].arguments.empty() && "[malformed-1] arguments must not be empty");
+ }
+
+ // Test 2: two tool calls, both malformed — must find both
+ {
+ auto msg = common_chat_peg_parse(arena,
+ "Two sets.\n"
+ "\n\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"CA\", \"recall\": \"LA\"}]}\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"NY\", \"recall\": \"NYC\"}]}\n"
+ "\n"
+ "\n"
+ "",
+ /* is_partial */ false, pp);
+
+ assert(!msg.reasoning_content.empty() && "[malformed-2] reasoning must be extracted");
+ assert(msg.tool_calls.size() == 2 && "[malformed-2] two tool calls must be found");
+ assert(msg.tool_calls[0].name == "flashcards" && "[malformed-2] tool 0 name");
+ assert(msg.tool_calls[1].name == "flashcards" && "[malformed-2] tool 1 name");
+ }
+
+ // Test 3: truncated tool call (model hit max_tokens mid-call)
+ // Without chat.cpp fix, this throws std::runtime_error on is_partial=false.
+ {
+ auto msg = common_chat_peg_parse(arena,
+ "I will make flashcards.\n"
+ "\n\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"Califor", // truncated mid-value
+ /* is_partial */ false, pp);
+
+ assert(!msg.reasoning_content.empty() && "[truncated] reasoning must survive");
+ }
+
+ // Test 4: well-formed JSON — no regression
+ {
+ auto msg = common_chat_peg_parse(arena,
+ "Making cards.\n"
+ "\n\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n"
+ "\n"
+ "\n"
+ "",
+ /* is_partial */ false, pp);
+
+ assert(!msg.reasoning_content.empty() && "[wellformed] reasoning must be extracted");
+ assert(msg.tool_calls.size() == 1 && "[wellformed] one tool call");
+ assert(msg.tool_calls[0].name == "flashcards" && "[wellformed] tool name");
+ }
+
+ // Test from #20260: Qwen3.5-35B text between and .
+ // Real model output: transition sentence after thinking, before tool call.
+ // "Failed to parse input at pos N" in agentic loops.
+ {
+ auto msg = common_chat_peg_parse(arena,
+ "Let me check some more information.\n"
+ "\n"
+ "Let me review more key files.\n"
+ "\n"
+ "\n"
+ "\n"
+ "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n"
+ "\n"
+ "\n"
+ "",
+ /* is_partial */ false, pp);
+
+ assert(!msg.reasoning_content.empty() && "[#20260] reasoning must survive");
+ assert(msg.tool_calls.size() == 1 && "[#20260] tool call must be found");
+ assert(msg.tool_calls[0].name == "flashcards" && "[#20260] tool name");
+ }
+ }
+
+ // Test from #20344: GPT-OSS structured JSON output in final channel.
+ // Real error: "Failed to parse input at pos 416: <|start|>assistant<|channel|>final<|message|>{...}"
+ {
+ auto tmpls_gptoss = common_chat_templates_ptr(
+ common_chat_templates_init(nullptr, read_file("models/templates/openai-gpt-oss-120b.jinja")));
+
+ common_chat_templates_inputs inputs_gptoss;
+ inputs_gptoss.json_schema = R"({"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]})";
+ inputs_gptoss.add_generation_prompt = true;
+ inputs_gptoss.use_jinja = true;
+ inputs_gptoss.messages = {{ "user", "give me a person" }};
+
+ auto params_gptoss = common_chat_templates_apply(tmpls_gptoss.get(), inputs_gptoss);
+ common_peg_arena arena_gptoss;
+ arena_gptoss.load(params_gptoss.parser);
+
+ common_chat_parser_params pp_gptoss;
+ pp_gptoss.format = params_gptoss.format;
+
+ auto msg = common_chat_peg_parse(arena_gptoss,
+ "<|channel|>final<|message|>{\n"
+ " \"name\": \"John Doe\",\n"
+ " \"age\": 30\n"
+ "}",
+ /* is_partial */ false, pp_gptoss);
+
+ assert(!msg.content.empty() && "[#20344] content must be extracted");
+ }
+
{
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
tst.test(