From 0083bb18d6ac197bb463662c79569a8172811554 Mon Sep 17 00:00:00 2001
From: James O'Leary <jpohhhh@gmail.com>
Date: Mon, 16 Mar 2026 17:58:09 -0700
Subject: [PATCH] chat : fix AST fallback for non-partial parses, lenient
 TAG_WITH_TAGGED arg capture

---
 common/chat-auto-parser-generator.cpp |  18 ++-
 common/chat.cpp                       |  11 +-
 models/templates/Qwen3.5-0.8B.jinja   | 155 ++++++++++++++++++
 tests/test-chat.cpp                   | 219 ++++++++++++++++++++++++++
 4 files changed, 394 insertions(+), 9 deletions(-)
 create mode 100644 models/templates/Qwen3.5-0.8B.jinja

diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
index f19819494c8..ef526da89fc 100644
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -327,16 +327,22 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
                 }
             }
 
+            // Capture argument values with until(value_suffix) instead of
+            // schema-validated json(). Even with grammar constraints active
+            // during generation, small models can produce slightly malformed
+            // JSON (e.g. extra trailing braces). Strict json() validation
+            // causes PEG backtracking to wipe the entire AST (including
+            // reasoning + tool name that already parsed successfully).
+            // until(value_suffix) captures the raw value and lets the caller
+            // decide what to do with it.
+            auto value_parser = type == "string"
+                ? p.tool_arg_string_value(p.until(arguments.value_suffix))
+                : p.tool_arg_json_value(p.until(arguments.value_suffix));
             auto arg = p.tool_arg(
                 p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
                                 arguments.name_suffix) +
                 arguments.value_prefix +
-                (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
-                                                                     "tool-" + name + "-arg-" + param_name + "-schema",
-                                                                     param_schema, true)) :
-                                    p.tool_arg_json_value(p.schema(
-                                        p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
-                                        p.space()) +
+                value_parser +
                 p.tool_arg_close(p.literal(arguments.value_suffix)));
 
             auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
diff --git a/common/chat.cpp b/common/chat.cpp
index fb4751e5319..b1dc430b4e2 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1747,9 +1747,14 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
     auto result = parser.parse(ctx);
 
     if (result.fail()) {
-        // During partial parsing, return partial results if any AST nodes were captured
-        // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
-        if (is_partial && result.end > 0) {
+        // Return partial results if any AST nodes were captured
+        // This allows streaming to work correctly.
+        // Generally, callers will call with is_partial=false at the end of generation. 
+        // If, for example, JSON ends up with an extra closing character, the full parse will fail.
+        // From the callers perspective, a tool call that was fully seen until the eog disappears.
+        // Additionally, callers do not expect this method to throw, and it is a core part of their
+        // inference loop. The exception should only be thrown in a truly exceptional situation.
+        if (result.end > 0) {
             // Try to extract any partial results from what was successfully parsed
             common_chat_msg msg;
             msg.role = "assistant";
diff --git a/models/templates/Qwen3.5-0.8B.jinja b/models/templates/Qwen3.5-0.8B.jinja
new file mode 100644
index 00000000000..509ec9fc27d
--- /dev/null
+++ b/models/templates/Qwen3.5-0.8B.jinja
@@ -0,0 +1,155 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+    {%- if content is string %}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping %}
+        {%- for item in content %}
+            {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain images.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set image_count.value = image_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Picture ' ~ image_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+            {%- elif 'video' in item or item.type == 'video' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain videos.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set video_count.value = video_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Video ' ~ video_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+            {%- elif 'text' in item %}
+                {{- item.text }}
+            {%- else %}
+                {{- raise_exception('Unexpected item type in content.') }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif content is none or content is undefined %}
+        {{- '' }}
+    {%- else %}
+        {{- raise_exception('Unexpected content type.') }}
+    {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+    {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+    {{- '<|im_start|>system\n' }}
+    {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {%- if content %}
+            {{- '\n\n' + content }}
+        {%- endif %}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" %}
+        {%- set content = render_content(message.content, false)|trim %}
+        {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
+            {%- set ns.multi_step_tool = false %}
+            {%- set ns.last_query_index = index %}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+    {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content, true)|trim %}
+    {%- if message.role == "system" %}
+        {%- if not loop.first %}
+            {{- raise_exception('System message must be at the beginning.') }}
+        {%- endif %}
+    {%- elif message.role == "user" %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- set reasoning_content = reasoning_content|trim %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {%- if loop.first %}
+                    {%- if content|trim %}
+                        {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- else %}
+                        {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- endif %}
+                {%- else %}
+                    {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- endif %}
+                {%- if tool_call.arguments is mapping %}
+                    {%- for args_name in tool_call.arguments %}
+                        {%- set args_value = tool_call.arguments[args_name] %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- raise_exception('Unexpected message role.') }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is true %}
+        {{- '<think>\n' }}
+    {%- else %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 3a6297e1480..52f6bcc0769 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1792,6 +1792,225 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
             })
             .run();
     }
+    // =========================================================================
+    // Qwen3.5-0.8B: template + basic tool calling + malformed JSON regression
+    // =========================================================================
+    {
+        auto tst = peg_tester("models/templates/Qwen3.5-0.8B.jinja", /*detailed_debug=*/false);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "The user wants flashcards about California.\n"
+               "</think>\n"
+               "\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("The user wants flashcards about California.\n")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+            })
+            .run();
+    }
+
+    // =========================================================================
+    // REGRESSION: Qwen3.5 TAG_WITH_TAGGED with malformed JSON in args.
+    //
+    // Real model output: Qwen3.5-0.8B produces an extra trailing } in the
+    // parameter value, e.g. [{"query":"CA","recall":"LA"}]} instead of
+    // [{"query":"CA","recall":"LA"}].
+    //
+    // Without patches:
+    //   chat-auto-parser-generator.cpp: schema(json()) rejects malformed JSON,
+    //     tool_call rule fails, PEG backtracks entire sequence, wipes AST
+    //   chat.cpp: final parse (is_partial=false) throws std::runtime_error
+    //     instead of using AST fallback
+    //   => complete data loss, no finish_reason, client hangs
+    //
+    // With patches:
+    //   chat-auto-parser-generator.cpp: until(value_suffix) captures raw value
+    //   chat.cpp: AST fallback fires for all parses, not just partial
+    //   => reasoning preserved, tool calls extracted
+    // =========================================================================
+    {
+        auto tmpls = common_chat_templates_ptr(
+            common_chat_templates_init(nullptr, read_file("models/templates/Qwen3.5-0.8B.jinja")));
+
+        static common_chat_tool flashcards_tool{
+            "flashcards", "Flashcards for studying",
+            R"({"type":"object","properties":{"flashcards":{"type":"array","items":{"type":"object","properties":{"query":{"type":"string"},"recall":{"type":"string"}},"required":["recall","query"]}}},"required":["flashcards"]})",
+        };
+
+        common_chat_templates_inputs inputs;
+        inputs.tools              = { flashcards_tool };
+        inputs.reasoning_format   = COMMON_REASONING_FORMAT_AUTO;
+        inputs.enable_thinking    = true;
+        inputs.add_generation_prompt = true;
+        inputs.use_jinja          = true;
+        inputs.messages           = {{ "user", "make a flashcard" }};
+
+        auto params = common_chat_templates_apply(tmpls.get(), inputs);
+        common_peg_arena arena;
+        arena.load(params.parser);
+
+        common_chat_parser_params pp;
+        pp.format = params.format;
+
+        // Regression tests: real Qwen3.5-0.8B output with malformed JSON
+        // Test 1: single tool call, malformed JSON (extra })
+        {
+            auto msg = common_chat_peg_parse(arena,
+                "I will make flashcards.\n"
+                "</think>\n\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"California\", \"recall\": \"Los Angeles\"}]}\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>",
+                /* is_partial */ false, pp);
+
+            assert(!msg.reasoning_content.empty()  && "[malformed-1] reasoning must be extracted");
+            assert(msg.content.empty()             && "[malformed-1] content must be empty");
+            assert(msg.tool_calls.size() == 1      && "[malformed-1] one tool call must be found");
+            assert(msg.tool_calls[0].name == "flashcards" && "[malformed-1] tool name");
+            assert(!msg.tool_calls[0].arguments.empty()   && "[malformed-1] arguments must not be empty");
+        }
+
+        // Test 2: two tool calls, both malformed — must find both
+        {
+            auto msg = common_chat_peg_parse(arena,
+                "Two sets.\n"
+                "</think>\n\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"CA\", \"recall\": \"LA\"}]}\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"NY\", \"recall\": \"NYC\"}]}\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>",
+                /* is_partial */ false, pp);
+
+            assert(!msg.reasoning_content.empty()  && "[malformed-2] reasoning must be extracted");
+            assert(msg.tool_calls.size() == 2      && "[malformed-2] two tool calls must be found");
+            assert(msg.tool_calls[0].name == "flashcards" && "[malformed-2] tool 0 name");
+            assert(msg.tool_calls[1].name == "flashcards" && "[malformed-2] tool 1 name");
+        }
+
+        // Test 3: truncated tool call (model hit max_tokens mid-call)
+        // Without chat.cpp fix, this throws std::runtime_error on is_partial=false.
+        {
+            auto msg = common_chat_peg_parse(arena,
+                "I will make flashcards.\n"
+                "</think>\n\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"Califor",  // truncated mid-value
+                /* is_partial */ false, pp);
+
+            assert(!msg.reasoning_content.empty()  && "[truncated] reasoning must survive");
+        }
+
+        // Test 4: well-formed JSON — no regression
+        {
+            auto msg = common_chat_peg_parse(arena,
+                "Making cards.\n"
+                "</think>\n\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>",
+                /* is_partial */ false, pp);
+
+            assert(!msg.reasoning_content.empty()  && "[wellformed] reasoning must be extracted");
+            assert(msg.tool_calls.size() == 1      && "[wellformed] one tool call");
+            assert(msg.tool_calls[0].name == "flashcards" && "[wellformed] tool name");
+        }
+
+        // Test from #20260: Qwen3.5-35B text between </think> and <tool_call>.
+        // Real model output: transition sentence after thinking, before tool call.
+        // "Failed to parse input at pos N" in agentic loops.
+        {
+            auto msg = common_chat_peg_parse(arena,
+                "Let me check some more information.\n"
+                "</think>\n"
+                "Let me review more key files.\n"
+                "<tool_call>\n"
+                "<function=flashcards>\n"
+                "<parameter=flashcards>\n"
+                "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>",
+                /* is_partial */ false, pp);
+
+            assert(!msg.reasoning_content.empty()  && "[#20260] reasoning must survive");
+            assert(msg.tool_calls.size() == 1      && "[#20260] tool call must be found");
+            assert(msg.tool_calls[0].name == "flashcards" && "[#20260] tool name");
+        }
+    }
+
+    // Test from #20344: GPT-OSS structured JSON output in final channel.
+    // Real error: "Failed to parse input at pos 416: <|start|>assistant<|channel|>final<|message|>{...}"
+    {
+        auto tmpls_gptoss = common_chat_templates_ptr(
+            common_chat_templates_init(nullptr, read_file("models/templates/openai-gpt-oss-120b.jinja")));
+
+        common_chat_templates_inputs inputs_gptoss;
+        inputs_gptoss.json_schema    = R"({"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]})";
+        inputs_gptoss.add_generation_prompt = true;
+        inputs_gptoss.use_jinja      = true;
+        inputs_gptoss.messages        = {{ "user", "give me a person" }};
+
+        auto params_gptoss = common_chat_templates_apply(tmpls_gptoss.get(), inputs_gptoss);
+        common_peg_arena arena_gptoss;
+        arena_gptoss.load(params_gptoss.parser);
+
+        common_chat_parser_params pp_gptoss;
+        pp_gptoss.format = params_gptoss.format;
+
+        auto msg = common_chat_peg_parse(arena_gptoss,
+            "<|channel|>final<|message|>{\n"
+            "  \"name\": \"John Doe\",\n"
+            "  \"age\": 30\n"
+            "}",
+            /* is_partial */ false, pp_gptoss);
+
+        assert(!msg.content.empty() && "[#20344] content must be extracted");
+    }
+
     {
         auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
         tst.test(