From 477a2171e2e8300a4e83e0bd92a1a5efb0993e75 Mon Sep 17 00:00:00 2001
From: Tarek Dakhran <tarek@liquid.ai>
Date: Sat, 6 Jun 2026 16:57:16 +0200
Subject: [PATCH 1/2] common/chat : fix LFM2 reasoning round-trip and stray
 <think> leak

---
 common/chat.cpp                      |  16 +-
 models/templates/LFM2.5-8B-A1B.jinja | 115 ++++++++
 tests/test-chat.cpp                  | 414 ++++++++++++---------------
 3 files changed, 313 insertions(+), 232 deletions(-)
 create mode 100644 models/templates/LFM2.5-8B-A1B.jinja
diff --git a/common/chat.cpp b/common/chat.cpp
index b8f248dab4e..512cbebc841 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1625,8 +1625,17 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
     const std::string THINK_END       = "</think>";
     const std::string GEN_PROMPT      = "<|im_start|>assistant\n";
 
-    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
-    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
+    // Copy reasoning to the "thinking" field the template expects
+    auto adjusted_messages = json::array();
+    for (auto msg : inputs.messages) {
+        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
+            msg["thinking"] = msg.at("reasoning_content");
+        }
+        adjusted_messages.push_back(msg);
+    }
+
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs, adjusted_messages);
+    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, adjusted_messages);
     data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
     data.supports_thinking = true;
     data.preserved_tokens  = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END };
@@ -1660,6 +1669,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
         auto reasoning = p.eps();
         if (extract_reasoning && inputs.enable_thinking) {
             reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        } else if (extract_reasoning) {
+            // Thinking off, but model may still emit <think>, drop it
+            reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END));
         }
 
         if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
diff --git a/models/templates/LFM2.5-8B-A1B.jinja b/models/templates/LFM2.5-8B-A1B.jinja
new file mode 100644
index 00000000000..8bca4a545e9
--- /dev/null
+++ b/models/templates/LFM2.5-8B-A1B.jinja
@@ -0,0 +1,115 @@
+{{- bos_token -}}
+{%- set preserve_thinking = preserve_thinking | default(false) -%}
+
+{%- macro format_arg_value(arg_value) -%}
+    {%- if arg_value is string -%}
+        {{- "'" + arg_value + "'" -}}
+    {%- elif arg_value is mapping -%}
+        {{- arg_value | tojson -}}
+    {%- else -%}
+        {{- arg_value | string -}}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro parse_content(content) -%}
+    {%- if content is string -%}
+        {{- content -}}
+    {%- else -%}
+        {%- set _ns = namespace(result="") -%}
+        {%- for item in content -%}
+            {%- if item["type"] == "image" -%}
+                {%- set _ns.result = _ns.result + "<image>" -%}
+            {%- elif item["type"] == "text" -%}
+                {%- set _ns.result = _ns.result + item["text"] -%}
+            {%- else -%}
+                {%- set _ns.result = _ns.result + item | tojson -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {{- _ns.result -}}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro render_tool_calls(tool_calls) -%}
+    {%- set tool_calls_ns = namespace(tool_calls=[]) -%}
+    {%- for tool_call in tool_calls -%}
+        {%- set func_name = tool_call["function"]["name"] -%}
+        {%- set func_args = tool_call["function"]["arguments"] -%}
+        {%- set args_ns = namespace(arg_strings=[]) -%}
+        {%- for arg_name, arg_value in func_args.items() -%}
+            {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + "=" + format_arg_value(arg_value)] -%}
+        {%- endfor -%}
+        {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + "(" + (args_ns.arg_strings | join(", ")) + ")"] -%}
+    {%- endfor -%}
+    {{- "<|tool_call_start|>[" + (tool_calls_ns.tool_calls | join(", ")) + "]<|tool_call_end|>" -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(system_prompt="", last_user_index=-1) -%}
+{%- if messages[0]["role"] == "system" -%}
+    {%- if messages[0].get("content") -%}
+        {%- set ns.system_prompt = parse_content(messages[0]["content"]) -%}
+    {%- endif -%}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
+    {%- for tool in tools -%}
+        {%- if tool is not string -%}
+            {%- set tool = tool | tojson -%}
+        {%- endif -%}
+        {%- set ns.system_prompt = ns.system_prompt + tool -%}
+        {%- if not loop.last -%}
+            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+    {%- if message["role"] == "user" -%}
+        {%- set ns.last_user_index = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+    {{- "<|im_start|>" + message.role + "\n" -}}
+    {%- if message.role == "assistant" -%}
+        {%- generation -%}
+        {%- if message.thinking is defined and (preserve_thinking or loop.index0 > ns.last_user_index) -%}
+            {{- "<think>" + message.thinking + "</think>" -}}
+        {%- endif -%}
+        {%- set _cfm_tag = "CONTINUE_FINAL_MESSAGE_TAG " -%}
+        {%- set _has_cfm = false -%}
+        {%- if message.content is defined -%}
+            {%- set content = parse_content(message.content) -%}
+            {%- if not (preserve_thinking or loop.index0 > ns.last_user_index) -%}
+                {%- if "</think>" in content -%}
+                    {%- set content = content.split("</think>")[-1] | trim -%}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if message.tool_calls is defined and content.endswith(_cfm_tag) -%}
+                {%- set _has_cfm = true -%}
+                {%- set _trunc_len = (content | length) - (_cfm_tag | length) -%}
+                {{- content[:_trunc_len] -}}
+            {%- else -%}
+                {{- content -}}
+            {%- endif -%}
+        {%- endif -%}
+        {%- if message.tool_calls is defined -%}
+            {{- render_tool_calls(message.tool_calls) -}}
+        {%- endif -%}
+        {%- if _has_cfm -%}
+            {{- _cfm_tag -}}
+        {%- endif -%}
+        {{- "<|im_end|>\n" -}}
+        {%- endgeneration -%}
+    {%- else %}
+        {%- if message.get("content") -%}
+            {{- parse_content(message["content"]) -}}
+        {%- endif -%}
+        {{- "<|im_end|>\n" -}}
+    {%- endif %}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 3107045b4fc..46d77dac5d4 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1825,6 +1825,171 @@ static void test_convert_responses_to_chatcmpl() {
     }
 }
 
+// Shared LFM2 parser cases - all variants use one output format and parser
+static void test_lfm2_parser(const std::string & template_path, bool detailed_debug) {
+    auto tst = peg_tester(template_path, detailed_debug);
+
+    // Basic content only
+    tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+    // Single tool call without reasoning
+    tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+        .tools({ special_function_tool })
+        .expect(message_assist_call)
+        .run();
+
+    // Tool call with string argument
+    tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
+        .tools({ get_time_tool })
+        .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+        .run();
+
+    // Python literals become JSON
+    tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
+        .tools({ toggle_tool })
+        .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
+        .run();
+
+    tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
+        .tools({ nullable_tool })
+        .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
+        .run();
+
+    // Nested Python literal
+    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
+        .tools({ config_tool })
+        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
+        .run();
+
+    // JSON literals are accepted too
+    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
+        .tools({ config_tool })
+        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
+        .run();
+
+    // Dotted function name with structured args
+    tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
+             "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
+        .tools({ calendar_create_event_tool })
+        .expect(message_with_tool_calls(
+            "Calendar.create_event",
+            R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
+        .run();
+
+    // Markdown links stay content
+    tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
+        .tools({ get_time_tool })
+        .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
+        .run();
+
+    // Python tool with multiline code in string
+    tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
+        .tools({ python_tool })
+        .expect_tool_calls({
+            { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
+        })
+        .run();
+
+    // Tool call with reasoning (enable_thinking=true)
+    tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+        .enable_thinking(true)
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .tools({ special_function_tool })
+        .expect(message_assist_call_thoughts)
+        .run();
+
+    // Tool call with reasoning and content
+    tst.test("<think>I need to call a function</think>"
+             "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
+        .enable_thinking(true)
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .tools({ get_time_tool })
+        .expect(message_with_reasoning_content_and_multiple_tool_calls(
+            "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+        ))
+        .run();
+
+    // Content before tool call (no reasoning)
+    tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
+        .tools({ get_time_tool })
+        .expect(message_with_reasoning_content_and_multiple_tool_calls(
+            "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+        ))
+        .run();
+
+    // Multiple tool calls (parallel)
+    tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
+        .parallel_tool_calls(true)
+        .tools({ special_function_tool, special_function_tool_with_optional_param })
+        .expect_tool_calls({
+            { "special_function", R"({"arg1": 1})", {} },
+            { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+        })
+        .run();
+
+    // Fake tool call marker inside reasoning is not parsed as a call
+    tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
+             "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+        .enable_thinking(true)
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .tools({ special_function_tool })
+        .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
+        .expect_tool_calls({
+            { "special_function", R"({"arg1": 1})", {} },
+        })
+        .run();
+
+    // Thinking off, but model still emits <think>: drop it, keep only content
+    tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+        .enable_thinking(false)
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .expect(message_assist)
+        .run();
+
+    // Thinking off with tools: <think> dropped, tool call still parsed
+    tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+        .enable_thinking(false)
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .tools({ special_function_tool })
+        .expect(message_assist_call)
+        .run();
+
+    // Partial tool call (streaming)
+    tst.test("<|tool_call_start|>[special_function(arg1=")
+        .tools({ special_function_tool })
+        .is_partial(true)
+        .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
+        .run();
+
+    // Tool call with empty arguments
+    tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
+        .tools({ empty_args_tool })
+        .expect(simple_assist_msg("", "", "empty_args", "{}"))
+        .run();
+
+    // Continuation: prefill content
+    tst.test("world!\nWhat's up?")
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .enable_thinking(true)
+        .messages({ message_user, message_assist_prefill_content })
+        .add_generation_prompt(false)
+        .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
+        .expect_reasoning("I'm thinking")
+        .expect_content("Hello, world!\nWhat's up?")
+        .run();
+
+    // Continuation: prefill reasoning
+    tst.test(" thinking</think>Hello, world!\nWhat's up?")
+        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+        .enable_thinking(true)
+        .messages({ message_user, message_assist_prefill_reasoning })
+        .add_generation_prompt(false)
+        .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
+        .expect_reasoning("I'm thinking")
+        .expect_content("Hello, world!\nWhat's up?")
+        .run();
+}
+
 static void test_template_output_peg_parsers(bool detailed_debug) {
     LOG_DBG("%s\n", __func__);
 
@@ -4038,230 +4203,12 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
             .run();
     }
 
-    // LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|>
-    {
-        auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug);
-
-        // Basic content only
-        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
-
-        // Single tool call without reasoning
-        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .tools({ special_function_tool })
-            .expect(message_assist_call)
-            .run();
-
-        // Tool call with string argument
-        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
-            .run();
-
-        // Tool call with reasoning (enable_thinking=true)
-        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .tools({ special_function_tool })
-            .expect(message_assist_call_thoughts)
-            .run();
-
-        // Multiple tool calls (parallel)
-        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
-            .parallel_tool_calls(true)
-            .tools({
-                special_function_tool, special_function_tool_with_optional_param
-            })
-            .expect_tool_calls({
-                { "special_function", R"({"arg1": 1})", {} },
-                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
-            })
-            .run();
-
-        // Tool call with reasoning and content
-        tst.test("<think>I need to call a function</think>"
-                 "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
-            .enable_thinking(true)
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .tools({ get_time_tool })
-            .expect(message_with_reasoning_content_and_multiple_tool_calls(
-                "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
-            ))
-            .run();
-
-        // Python tool with multiline code in string
-        tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
-            .tools({ python_tool })
-            .expect_tool_calls({
-                { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
-            })
-            .run();
-
-        // Partial tool call (streaming)
-        tst.test("<|tool_call_start|>[special_function(arg1=")
-            .tools({ special_function_tool })
-            .is_partial(true)
-            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
-            .run();
-
-        // Tool call with empty arguments
-        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
-            .tools({ empty_args_tool })
-            .expect(simple_assist_msg("", "", "empty_args", "{}"))
-            .run();
-
-        // fake tool call marker in reasoning
-        tst.test(
-               "<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
-               "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .tools({ special_function_tool })
-            .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
-            .expect_tool_calls({
-                { "special_function", R"({"arg1": 1})", {} },
-            })
-            .run();
-
-        // Continuation tests
-        tst.test("world!\nWhat's up?")
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_content })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
-            .run();
-
-        tst.test(" thinking</think>Hello, world!\nWhat's up?")
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_reasoning })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
-            .run();
-    }
-
-    // LFM2.5 tests - format <|tool_call_start|>[name(args)]<|tool_call_end|>
-    {
-        auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
-
-        // Basic content only
-        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
-
-        // Single tool call without reasoning
-        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .tools({ special_function_tool })
-            .expect(message_assist_call)
-            .run();
-
-        // Tool call with string argument
-        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
-            .run();
-
-        // Python literals become JSON.
-        tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
-            .tools({ toggle_tool })
-            .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
-            .run();
-
-        tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
-            .tools({ nullable_tool })
-            .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
-            .run();
-
-        // Nested Python literal.
-        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
-            .tools({ config_tool })
-            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
-            .run();
-
-        // JSON literals are accepted too.
-        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
-            .tools({ config_tool })
-            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
-            .run();
-
-        // Dotted function name with structured args.
-        tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
-                 "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
-            .tools({ calendar_create_event_tool })
-            .expect(message_with_tool_calls(
-                "Calendar.create_event",
-                R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
-            .run();
-
-        // Markdown links stay content.
-        tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
-            .tools({ get_time_tool })
-            .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
-            .run();
-
-        // Tool call with reasoning (enable_thinking=true)
-        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .tools({ special_function_tool })
-            .expect(message_assist_call_thoughts)
-            .run();
-
-        // Multiple tool calls (parallel)
-        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
-            .parallel_tool_calls(true)
-            .tools({
-                special_function_tool, special_function_tool_with_optional_param
-            })
-            .expect_tool_calls({
-                { "special_function", R"({"arg1": 1})", {} },
-                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
-            })
-            .run();
-
-        // Tool call with content before tool call
-        tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_reasoning_content_and_multiple_tool_calls(
-                "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
-            ))
-            .run();
-
-        // Partial tool call (streaming)
-        tst.test("<|tool_call_start|>[special_function(arg1=")
-            .tools({ special_function_tool })
-            .is_partial(true)
-            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
-            .run();
-
-        // Tool call with empty arguments
-        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
-            .tools({ empty_args_tool })
-            .expect(simple_assist_msg("", "", "empty_args", "{}"))
-            .run();
-
-        // Continuation tests
-        tst.test("world!\nWhat's up?")
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_content })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
-            .run();
-
-        tst.test(" thinking</think>Hello, world!\nWhat's up?")
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_reasoning })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
-            .run();
+    for (const char * tmpl : {
+             "models/templates/LFM2-8B-A1B.jinja",
+             "models/templates/LFM2.5-Instruct.jinja",
+             "models/templates/LFM2.5-8B-A1B.jinja",
+         }) {
+        test_lfm2_parser(tmpl, detailed_debug);
     }
 
     // Reka-Edge tests - uses native JSON format with per-call wrapper
@@ -5478,18 +5425,25 @@ static void test_template_generation_prompt() {
         check(tmpls, continuation_reasoning(), "<|im_assistant|>assistant<|im_middle|><think>I'm");
     }
 
-    {
-        auto tmpls = read_templates("models/templates/LFM2-8B-A1B.jinja");
+    for (const char * tmpl : {
+             "models/templates/LFM2-8B-A1B.jinja",
+             "models/templates/LFM2.5-Instruct.jinja",
+             "models/templates/LFM2.5-8B-A1B.jinja",
+         }) {
+        auto tmpls = read_templates(tmpl);
         check(tmpls, basic(),                  "<|im_start|>assistant\n");
         check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
         check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
     }
 
     {
-        auto tmpls = read_templates("models/templates/LFM2.5-Instruct.jinja");
-        check(tmpls, basic(),                  "<|im_start|>assistant\n");
-        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
-        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
+        // 8B-A1B renders prior-turn reasoning via the "thinking" field
+        auto tmpls = read_templates("models/templates/LFM2.5-8B-A1B.jinja");
+        common_chat_templates_inputs inputs;
+        inputs.messages              = { message_user, message_assist_call_thoughts, tool_msg };
+        inputs.add_generation_prompt = true;
+        auto params = common_chat_templates_apply(tmpls.get(), inputs);
+        assert_contains(params.prompt, "<think>I'm\nthinking</think>");
     }
 
     {

From e9c51d926cf6f2b5f3edd1e2888095486b0fed20 Mon Sep 17 00:00:00 2001
From: Tarek Dakhran <tarek@liquid.ai>
Date: Sat, 6 Jun 2026 19:46:48 +0200
Subject: [PATCH 2/2] Gate by reasoning format and whether the template
 supports <think>

---
 common/chat.cpp     |   9 ++-
 tests/test-chat.cpp | 138 +++++++++++++++++++++++---------------------
 2 files changed, 75 insertions(+), 72 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 512cbebc841..24e58ab0640 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1648,7 +1648,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
     data.thinking_end_tag   = THINK_END;
 
     auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    // Gate by reasoning format and whether the template supports <think>
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
+                             tmpl.source().find(THINK_START) != std::string::npos;
     auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
 
     if (inputs.has_continuation()) {
@@ -1667,11 +1669,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
         auto end = p.end();
 
         auto reasoning = p.eps();
-        if (extract_reasoning && inputs.enable_thinking) {
+        if (extract_reasoning) {
             reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
-        } else if (extract_reasoning) {
-            // Thinking off, but model may still emit <think>, drop it
-            reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END));
         }
 
         if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 46d77dac5d4..c1be9eb5a99 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1890,25 +1890,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de
         })
         .run();
 
-    // Tool call with reasoning (enable_thinking=true)
-    tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-        .enable_thinking(true)
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .tools({ special_function_tool })
-        .expect(message_assist_call_thoughts)
-        .run();
-
-    // Tool call with reasoning and content
-    tst.test("<think>I need to call a function</think>"
-             "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
-        .enable_thinking(true)
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .tools({ get_time_tool })
-        .expect(message_with_reasoning_content_and_multiple_tool_calls(
-            "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
-        ))
-        .run();
-
     // Content before tool call (no reasoning)
     tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
         .tools({ get_time_tool })
@@ -1927,33 +1908,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de
         })
         .run();
 
-    // Fake tool call marker inside reasoning is not parsed as a call
-    tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
-             "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-        .enable_thinking(true)
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .tools({ special_function_tool })
-        .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
-        .expect_tool_calls({
-            { "special_function", R"({"arg1": 1})", {} },
-        })
-        .run();
-
-    // Thinking off, but model still emits <think>: drop it, keep only content
-    tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
-        .enable_thinking(false)
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .expect(message_assist)
-        .run();
-
-    // Thinking off with tools: <think> dropped, tool call still parsed
-    tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-        .enable_thinking(false)
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .tools({ special_function_tool })
-        .expect(message_assist_call)
-        .run();
-
     // Partial tool call (streaming)
     tst.test("<|tool_call_start|>[special_function(arg1=")
         .tools({ special_function_tool })
@@ -1967,27 +1921,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de
         .expect(simple_assist_msg("", "", "empty_args", "{}"))
         .run();
 
-    // Continuation: prefill content
-    tst.test("world!\nWhat's up?")
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .enable_thinking(true)
-        .messages({ message_user, message_assist_prefill_content })
-        .add_generation_prompt(false)
-        .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
-        .expect_reasoning("I'm thinking")
-        .expect_content("Hello, world!\nWhat's up?")
-        .run();
-
-    // Continuation: prefill reasoning
-    tst.test(" thinking</think>Hello, world!\nWhat's up?")
-        .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        .enable_thinking(true)
-        .messages({ message_user, message_assist_prefill_reasoning })
-        .add_generation_prompt(false)
-        .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
-        .expect_reasoning("I'm thinking")
-        .expect_content("Hello, world!\nWhat's up?")
-        .run();
 }
 
 static void test_template_output_peg_parsers(bool detailed_debug) {
@@ -4211,6 +4144,77 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
         test_lfm2_parser(tmpl, detailed_debug);
     }
 
+    // Thinking cases only apply to LFM2.5-8B-A1B, the one LFM2 template that emits <think>
+    {
+        auto tst = peg_tester("models/templates/LFM2.5-8B-A1B.jinja", detailed_debug);
+
+        // Reasoning is parsed independent of enable_thinking
+
+        // Tool call with reasoning
+        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Tool call with reasoning and content
+        tst.test("<think>I need to call a function</think>"
+                 "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ get_time_tool })
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+            ))
+            .run();
+
+        // Fake tool call marker inside reasoning is not parsed as a call
+        tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
+                 "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+            })
+            .run();
+
+        // enable_thinking=false still captures emitted reasoning
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Continuation: prefill content
+        tst.test("world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .messages({ message_user, message_assist_prefill_content })
+            .add_generation_prompt(false)
+            .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
+            .expect_reasoning("I'm thinking")
+            .expect_content("Hello, world!\nWhat's up?")
+            .run();
+
+        // Continuation: prefill reasoning
+        tst.test(" thinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .messages({ message_user, message_assist_prefill_reasoning })
+            .add_generation_prompt(false)
+            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
+            .expect_reasoning("I'm thinking")
+            .expect_content("Hello, world!\nWhat's up?")
+            .run();
+    }
+
     // Reka-Edge tests - uses native JSON format with per-call wrapper
     {
         auto tst = peg_tester("models/templates/Reka-Edge.jinja", detailed_debug);