From 477a2171e2e8300a4e83e0bd92a1a5efb0993e75 Mon Sep 17 00:00:00 2001 From: Tarek Dakhran Date: Sat, 6 Jun 2026 16:57:16 +0200 Subject: [PATCH 1/2] common/chat : fix LFM2 reasoning round-trip and stray leak --- common/chat.cpp | 16 +- models/templates/LFM2.5-8B-A1B.jinja | 115 ++++++++ tests/test-chat.cpp | 414 ++++++++++++--------------- 3 files changed, 313 insertions(+), 232 deletions(-) create mode 100644 models/templates/LFM2.5-8B-A1B.jinja diff --git a/common/chat.cpp b/common/chat.cpp index b8f248dab4e..512cbebc841 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1625,8 +1625,17 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat const std::string THINK_END = ""; const std::string GEN_PROMPT = "<|im_start|>assistant\n"; - data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs); - data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs); + // Copy reasoning to the "thinking" field the template expects + auto adjusted_messages = json::array(); + for (auto msg : inputs.messages) { + if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) { + msg["thinking"] = msg.at("reasoning_content"); + } + adjusted_messages.push_back(msg); + } + + data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs, adjusted_messages); + data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, adjusted_messages); data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.supports_thinking = true; data.preserved_tokens = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END }; @@ -1660,6 +1669,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat auto reasoning = p.eps(); if (extract_reasoning && inputs.enable_thinking) { reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END); + } else if (extract_reasoning) { + // Thinking off, but model may still emit , drop it + reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END)); } if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { diff --git a/models/templates/LFM2.5-8B-A1B.jinja b/models/templates/LFM2.5-8B-A1B.jinja new file mode 100644 index 00000000000..8bca4a545e9 --- /dev/null +++ b/models/templates/LFM2.5-8B-A1B.jinja @@ -0,0 +1,115 @@ +{{- bos_token -}} +{%- set preserve_thinking = preserve_thinking | default(false) -%} + +{%- macro format_arg_value(arg_value) -%} + {%- if arg_value is string -%} + {{- "'" + arg_value + "'" -}} + {%- elif arg_value is mapping -%} + {{- arg_value | tojson -}} + {%- else -%} + {{- arg_value | string -}} + {%- endif -%} +{%- endmacro -%} + +{%- macro parse_content(content) -%} + {%- if content is string -%} + {{- content -}} + {%- else -%} + {%- set _ns = namespace(result="") -%} + {%- for item in content -%} + {%- if item["type"] == "image" -%} + {%- set _ns.result = _ns.result + "" -%} + {%- elif item["type"] == "text" -%} + {%- set _ns.result = _ns.result + item["text"] -%} + {%- else -%} + {%- set _ns.result = _ns.result + item | tojson -%} + {%- endif -%} + {%- endfor -%} + {{- _ns.result -}} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_calls(tool_calls) -%} + {%- set tool_calls_ns = namespace(tool_calls=[]) -%} + {%- for tool_call in tool_calls -%} + {%- set func_name = tool_call["function"]["name"] -%} + {%- set func_args = tool_call["function"]["arguments"] -%} + {%- set args_ns = namespace(arg_strings=[]) -%} + {%- for arg_name, arg_value in func_args.items() -%} + {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + "=" + format_arg_value(arg_value)] -%} + {%- endfor -%} + {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + "(" + (args_ns.arg_strings | join(", ")) + ")"] -%} + {%- endfor -%} + {{- "<|tool_call_start|>[" + (tool_calls_ns.tool_calls | join(", ")) + "]<|tool_call_end|>" -}} +{%- endmacro -%} + +{%- set ns = namespace(system_prompt="", last_user_index=-1) -%} +{%- if messages[0]["role"] == "system" -%} + {%- if messages[0].get("content") -%} + {%- set ns.system_prompt = parse_content(messages[0]["content"]) -%} + {%- endif -%} + {%- set messages = messages[1:] -%} +{%- endif -%} +{%- if tools -%} + {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%} + {%- for tool in tools -%} + {%- if tool is not string -%} + {%- set tool = tool | tojson -%} + {%- endif -%} + {%- set ns.system_prompt = ns.system_prompt + tool -%} + {%- if not loop.last -%} + {%- set ns.system_prompt = ns.system_prompt + ", " -%} + {%- endif -%} + {%- endfor -%} + {%- set ns.system_prompt = ns.system_prompt + "]" -%} +{%- endif -%} +{%- if ns.system_prompt -%} + {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}} +{%- endif -%} +{%- for message in messages -%} + {%- if message["role"] == "user" -%} + {%- set ns.last_user_index = loop.index0 -%} + {%- endif -%} +{%- endfor -%} +{%- for message in messages -%} + {{- "<|im_start|>" + message.role + "\n" -}} + {%- if message.role == "assistant" -%} + {%- generation -%} + {%- if message.thinking is defined and (preserve_thinking or loop.index0 > ns.last_user_index) -%} + {{- "" + message.thinking + "" -}} + {%- endif -%} + {%- set _cfm_tag = "CONTINUE_FINAL_MESSAGE_TAG " -%} + {%- set _has_cfm = false -%} + {%- if message.content is defined -%} + {%- set content = parse_content(message.content) -%} + {%- if not (preserve_thinking or loop.index0 > ns.last_user_index) -%} + {%- if "" in content -%} + {%- set content = content.split("")[-1] | trim -%} + {%- endif -%} + {%- endif -%} + {%- if message.tool_calls is defined and content.endswith(_cfm_tag) -%} + {%- set _has_cfm = true -%} + {%- set _trunc_len = (content | length) - (_cfm_tag | length) -%} + {{- content[:_trunc_len] -}} + {%- else -%} + {{- content -}} + {%- endif -%} + {%- endif -%} + {%- if message.tool_calls is defined -%} + {{- render_tool_calls(message.tool_calls) -}} + {%- endif -%} + {%- if _has_cfm -%} + {{- _cfm_tag -}} + {%- endif -%} + {{- "<|im_end|>\n" -}} + {%- endgeneration -%} + {%- else %} + {%- if message.get("content") -%} + {{- parse_content(message["content"]) -}} + {%- endif -%} + {{- "<|im_end|>\n" -}} + {%- endif %} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- "<|im_start|>assistant\n" -}} +{%- endif -%} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 3107045b4fc..46d77dac5d4 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1825,6 +1825,171 @@ static void test_convert_responses_to_chatcmpl() { } } +// Shared LFM2 parser cases - all variants use one output format and parser +static void test_lfm2_parser(const std::string & template_path, bool detailed_debug) { + auto tst = peg_tester(template_path, detailed_debug); + + // Basic content only + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Single tool call without reasoning + tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with string argument + tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>") + .tools({ get_time_tool }) + .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) + .run(); + + // Python literals become JSON + tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>") + .tools({ toggle_tool }) + .expect(message_with_tool_calls("toggle", R"({"enabled": true})")) + .run(); + + tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>") + .tools({ nullable_tool }) + .expect(message_with_tool_calls("set_nullable", R"({"value": null})")) + .run(); + + // Nested Python literal + tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>") + .tools({ config_tool }) + .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})")) + .run(); + + // JSON literals are accepted too + tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>") + .tools({ config_tool }) + .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})")) + .run(); + + // Dotted function name with structured args + tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], " + "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>") + .tools({ calendar_create_event_tool }) + .expect(message_with_tool_calls( + "Calendar.create_event", + R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})")) + .run(); + + // Markdown links stay content + tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).") + .tools({ get_time_tool }) + .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")) + .run(); + + // Python tool with multiline code in string + tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>") + .tools({ python_tool }) + .expect_tool_calls({ + { "python", R"#({"code": "def hello():\\n print('hey')"})#", "" } + }) + .run(); + + // Tool call with reasoning (enable_thinking=true) + tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Tool call with reasoning and content + tst.test("I need to call a function" + "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ get_time_tool }) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } + )) + .run(); + + // Content before tool call (no reasoning) + tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") + .tools({ get_time_tool }) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } + )) + .run(); + + // Multiple tool calls (parallel) + tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>") + .parallel_tool_calls(true) + .tools({ special_function_tool, special_function_tool_with_optional_param }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + // Fake tool call marker inside reasoning is not parsed as a call + tst.test("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm" + "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + + // Thinking off, but model still emits : drop it, keep only content + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist) + .run(); + + // Thinking off with tools: dropped, tool call still parsed + tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Partial tool call (streaming) + tst.test("<|tool_call_start|>[special_function(arg1=") + .tools({ special_function_tool }) + .is_partial(true) + .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ")) + .run(); + + // Tool call with empty arguments + tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>") + .tools({ empty_args_tool }) + .expect(simple_assist_msg("", "", "empty_args", "{}")) + .run(); + + // Continuation: prefill content + tst.test("world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .messages({ message_user, message_assist_prefill_content }) + .add_generation_prompt(false) + .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT) + .expect_reasoning("I'm thinking") + .expect_content("Hello, world!\nWhat's up?") + .run(); + + // Continuation: prefill reasoning + tst.test(" thinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .messages({ message_user, message_assist_prefill_reasoning }) + .add_generation_prompt(false) + .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING) + .expect_reasoning("I'm thinking") + .expect_content("Hello, world!\nWhat's up?") + .run(); +} + static void test_template_output_peg_parsers(bool detailed_debug) { LOG_DBG("%s\n", __func__); @@ -4038,230 +4203,12 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .run(); } - // LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|> - { - auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug); - - // Basic content only - tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - - // Single tool call without reasoning - tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .tools({ special_function_tool }) - .expect(message_assist_call) - .run(); - - // Tool call with string argument - tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>") - .tools({ get_time_tool }) - .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) - .run(); - - // Tool call with reasoning (enable_thinking=true) - tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect(message_assist_call_thoughts) - .run(); - - // Multiple tool calls (parallel) - tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>") - .parallel_tool_calls(true) - .tools({ - special_function_tool, special_function_tool_with_optional_param - }) - .expect_tool_calls({ - { "special_function", R"({"arg1": 1})", {} }, - { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, - }) - .run(); - - // Tool call with reasoning and content - tst.test("I need to call a function" - "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ get_time_tool }) - .expect(message_with_reasoning_content_and_multiple_tool_calls( - "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } - )) - .run(); - - // Python tool with multiline code in string - tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>") - .tools({ python_tool }) - .expect_tool_calls({ - { "python", R"#({"code": "def hello():\\n print('hey')"})#", "" } - }) - .run(); - - // Partial tool call (streaming) - tst.test("<|tool_call_start|>[special_function(arg1=") - .tools({ special_function_tool }) - .is_partial(true) - .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ")) - .run(); - - // Tool call with empty arguments - tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>") - .tools({ empty_args_tool }) - .expect(simple_assist_msg("", "", "empty_args", "{}")) - .run(); - - // fake tool call marker in reasoning - tst.test( - "Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm" - "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm") - .expect_tool_calls({ - { "special_function", R"({"arg1": 1})", {} }, - }) - .run(); - - // Continuation tests - tst.test("world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_content }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); - - tst.test(" thinkingHello, world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_reasoning }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); - } - - // LFM2.5 tests - format <|tool_call_start|>[name(args)]<|tool_call_end|> - { - auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug); - - // Basic content only - tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - - // Single tool call without reasoning - tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .tools({ special_function_tool }) - .expect(message_assist_call) - .run(); - - // Tool call with string argument - tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>") - .tools({ get_time_tool }) - .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) - .run(); - - // Python literals become JSON. - tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>") - .tools({ toggle_tool }) - .expect(message_with_tool_calls("toggle", R"({"enabled": true})")) - .run(); - - tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>") - .tools({ nullable_tool }) - .expect(message_with_tool_calls("set_nullable", R"({"value": null})")) - .run(); - - // Nested Python literal. - tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>") - .tools({ config_tool }) - .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})")) - .run(); - - // JSON literals are accepted too. - tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>") - .tools({ config_tool }) - .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})")) - .run(); - - // Dotted function name with structured args. - tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], " - "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>") - .tools({ calendar_create_event_tool }) - .expect(message_with_tool_calls( - "Calendar.create_event", - R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})")) - .run(); - - // Markdown links stay content. - tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).") - .tools({ get_time_tool }) - .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")) - .run(); - - // Tool call with reasoning (enable_thinking=true) - tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect(message_assist_call_thoughts) - .run(); - - // Multiple tool calls (parallel) - tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>") - .parallel_tool_calls(true) - .tools({ - special_function_tool, special_function_tool_with_optional_param - }) - .expect_tool_calls({ - { "special_function", R"({"arg1": 1})", {} }, - { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, - }) - .run(); - - // Tool call with content before tool call - tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") - .tools({ get_time_tool }) - .expect(message_with_reasoning_content_and_multiple_tool_calls( - "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } - )) - .run(); - - // Partial tool call (streaming) - tst.test("<|tool_call_start|>[special_function(arg1=") - .tools({ special_function_tool }) - .is_partial(true) - .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ")) - .run(); - - // Tool call with empty arguments - tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>") - .tools({ empty_args_tool }) - .expect(simple_assist_msg("", "", "empty_args", "{}")) - .run(); - - // Continuation tests - tst.test("world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_content }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); - - tst.test(" thinkingHello, world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_reasoning }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); + for (const char * tmpl : { + "models/templates/LFM2-8B-A1B.jinja", + "models/templates/LFM2.5-Instruct.jinja", + "models/templates/LFM2.5-8B-A1B.jinja", + }) { + test_lfm2_parser(tmpl, detailed_debug); } // Reka-Edge tests - uses native JSON format with per-call wrapper @@ -5478,18 +5425,25 @@ static void test_template_generation_prompt() { check(tmpls, continuation_reasoning(), "<|im_assistant|>assistant<|im_middle|>I'm"); } - { - auto tmpls = read_templates("models/templates/LFM2-8B-A1B.jinja"); + for (const char * tmpl : { + "models/templates/LFM2-8B-A1B.jinja", + "models/templates/LFM2.5-Instruct.jinja", + "models/templates/LFM2.5-8B-A1B.jinja", + }) { + auto tmpls = read_templates(tmpl); check(tmpls, basic(), "<|im_start|>assistant\n"); check(tmpls, continuation_content(), "<|im_start|>assistant\nI'm thinkingHello, "); check(tmpls, continuation_reasoning(), "<|im_start|>assistant\nI'm"); } { - auto tmpls = read_templates("models/templates/LFM2.5-Instruct.jinja"); - check(tmpls, basic(), "<|im_start|>assistant\n"); - check(tmpls, continuation_content(), "<|im_start|>assistant\nI'm thinkingHello, "); - check(tmpls, continuation_reasoning(), "<|im_start|>assistant\nI'm"); + // 8B-A1B renders prior-turn reasoning via the "thinking" field + auto tmpls = read_templates("models/templates/LFM2.5-8B-A1B.jinja"); + common_chat_templates_inputs inputs; + inputs.messages = { message_user, message_assist_call_thoughts, tool_msg }; + inputs.add_generation_prompt = true; + auto params = common_chat_templates_apply(tmpls.get(), inputs); + assert_contains(params.prompt, "I'm\nthinking"); } { From e9c51d926cf6f2b5f3edd1e2888095486b0fed20 Mon Sep 17 00:00:00 2001 From: Tarek Dakhran Date: Sat, 6 Jun 2026 19:46:48 +0200 Subject: [PATCH 2/2] Gate by reasoning format and whether the template supports --- common/chat.cpp | 9 ++- tests/test-chat.cpp | 138 +++++++++++++++++++++++--------------------- 2 files changed, 75 insertions(+), 72 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 512cbebc841..24e58ab0640 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1648,7 +1648,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat data.thinking_end_tag = THINK_END; auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + // Gate by reasoning format and whether the template supports + auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && + tmpl.source().find(THINK_START) != std::string::npos; auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; if (inputs.has_continuation()) { @@ -1667,11 +1669,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat auto end = p.end(); auto reasoning = p.eps(); - if (extract_reasoning && inputs.enable_thinking) { + if (extract_reasoning) { reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END); - } else if (extract_reasoning) { - // Thinking off, but model may still emit , drop it - reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END)); } if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 46d77dac5d4..c1be9eb5a99 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1890,25 +1890,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de }) .run(); - // Tool call with reasoning (enable_thinking=true) - tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect(message_assist_call_thoughts) - .run(); - - // Tool call with reasoning and content - tst.test("I need to call a function" - "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ get_time_tool }) - .expect(message_with_reasoning_content_and_multiple_tool_calls( - "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } - )) - .run(); - // Content before tool call (no reasoning) tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") .tools({ get_time_tool }) @@ -1927,33 +1908,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de }) .run(); - // Fake tool call marker inside reasoning is not parsed as a call - tst.test("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm" - "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(true) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm") - .expect_tool_calls({ - { "special_function", R"({"arg1": 1})", {} }, - }) - .run(); - - // Thinking off, but model still emits : drop it, keep only content - tst.test("I'm\nthinkingHello, world!\nWhat's up?") - .enable_thinking(false) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .expect(message_assist) - .run(); - - // Thinking off with tools: dropped, tool call still parsed - tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") - .enable_thinking(false) - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({ special_function_tool }) - .expect(message_assist_call) - .run(); - // Partial tool call (streaming) tst.test("<|tool_call_start|>[special_function(arg1=") .tools({ special_function_tool }) @@ -1967,27 +1921,6 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de .expect(simple_assist_msg("", "", "empty_args", "{}")) .run(); - // Continuation: prefill content - tst.test("world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_content }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); - - // Continuation: prefill reasoning - tst.test(" thinkingHello, world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .enable_thinking(true) - .messages({ message_user, message_assist_prefill_reasoning }) - .add_generation_prompt(false) - .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING) - .expect_reasoning("I'm thinking") - .expect_content("Hello, world!\nWhat's up?") - .run(); } static void test_template_output_peg_parsers(bool detailed_debug) { @@ -4211,6 +4144,77 @@ static void test_template_output_peg_parsers(bool detailed_debug) { test_lfm2_parser(tmpl, detailed_debug); } + // Thinking cases only apply to LFM2.5-8B-A1B, the one LFM2 template that emits + { + auto tst = peg_tester("models/templates/LFM2.5-8B-A1B.jinja", detailed_debug); + + // Reasoning is parsed independent of enable_thinking + + // Tool call with reasoning + tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Tool call with reasoning and content + tst.test("I need to call a function" + "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ get_time_tool }) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } } + )) + .run(); + + // Fake tool call marker inside reasoning is not parsed as a call + tst.test("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm" + "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + + // enable_thinking=false still captures emitted reasoning + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + tst.test("I'm\nthinking<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Continuation: prefill content + tst.test("world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .messages({ message_user, message_assist_prefill_content }) + .add_generation_prompt(false) + .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT) + .expect_reasoning("I'm thinking") + .expect_content("Hello, world!\nWhat's up?") + .run(); + + // Continuation: prefill reasoning + tst.test(" thinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .messages({ message_user, message_assist_prefill_reasoning }) + .add_generation_prompt(false) + .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING) + .expect_reasoning("I'm thinking") + .expect_content("Hello, world!\nWhat's up?") + .run(); + } + // Reka-Edge tests - uses native JSON format with per-call wrapper { auto tst = peg_tester("models/templates/Reka-Edge.jinja", detailed_debug);