From 63f4032c098191fee0fe129c3a9440b19eaa806e Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Fri, 5 Sep 2025 01:24:08 +0200 Subject: [PATCH 1/4] chat : fixed crash when Hermes 2 had a newline before it (#15639) Co-authored-by: CNE Pierre FICHEPOIL --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 962245b5e..2bb695c14 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1833,7 +1833,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat // If thinking_forced_open, then we capture the tag in the grammar, // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + ( - "(\\s*" + "\\s*(" "(?:" "||||)?" From 238c7254ceab668a2d8dc2d6cbd3245a136080ee Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 5 Sep 2025 14:31:24 -0600 Subject: [PATCH 2/4] Thinking model disabled assistant prefill (#15404) * feat: Set enable_thinking IFF not disabled and supported Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * fix: Fix inverted logic condition for prefill error Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * fix: Always parse the enable_thinking kwarg to overwrite the default value From what I can tell, this started as a Qwen3-specific keyword, but from the use in `chat.cpp` translates this inputs.enable_thinking to the right thinking kwarg for the given model, this is now more of a standardized kwarg, so it should always override the default value when sent as part of the chat_template_kwargs field in the API. Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * fix: Don't limit tempalte expansion check to jinja With the use_jinja check, non-jinja models would enable thinking and always fail assistant prefill Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * feat: Add the error text to json type errors in json_value Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * feat: Explicitly reject string values for "enable_thinking" There are too many possible "truthy" / "falsy" strings and too many ambiguous strings that don't have a clear truthy/falsy value, so the simplest thing to do here is to reject the request. Ideally, this would be a 422 (Unprocessable Entity), but right now it's coming back as a 500. Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * refactor: Move logic for detecting template enable_thinking support to common Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart * fix: Use raw pointer for common chat template function Branch: gabe-l-hart/thinking-model-disabled-agent-prefill Signed-off-by: Gabe Goodhart --------- Signed-off-by: Gabe Goodhart # Conflicts: # tools/server/server.cpp # tools/server/utils.hpp --- common/chat.cpp | 13 +++++++++++++ common/chat.h | 2 ++ examples/server/server.cpp | 9 ++++++++- examples/server/utils.hpp | 18 +++++++++++++++--- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 2bb695c14..f4817a55c 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -165,6 +165,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin throw std::runtime_error("Invalid tool_choice: " + tool_choice); } +bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) { + common_chat_templates_inputs dummy_inputs; + common_chat_msg msg; + msg.role = "user"; + msg.content = "test"; + dummy_inputs.messages = {msg}; + dummy_inputs.enable_thinking = false; + const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); + dummy_inputs.enable_thinking = true; + const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); + return rendered_no_thinking.prompt != rendered_with_thinking.prompt; +} + template <> std::vector common_chat_msgs_parse_oaicompat(const json & messages) { std::vector msgs; diff --git a/common/chat.h b/common/chat.h index 33553e3a9..05fe75be1 100644 --- a/common/chat.h +++ b/common/chat.h @@ -196,6 +196,8 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_p common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); +bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates); + // Parses a JSON array of messages in OpenAI's chat completion API format. // T can be std::string containing JSON or nlohmann::ordered_json template std::vector common_chat_msgs_parse_oaicompat(const T & messages); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 88f65c24a..aa9ab147c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1355,6 +1355,13 @@ struct server_context { } metrics.init(); + + // thinking is enabled if: + // 1. It's not explicitly disabled (reasoning_budget == 0) + // 2. The chat template supports it + const bool enable_thinking = params.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates.get()); + LLAMA_LOG_INFO("Enable thinking? %d\n", enable_thinking); + oai_parser_opt = { /* use_jinja */ params.use_jinja, /* prefill_assistant */ params.prefill_assistant, @@ -1363,7 +1370,7 @@ struct server_context { /* common_chat_templates */ chat_templates.get(), /* allow_image */ false, /* allow_audio */ false, - /* enable_thinking */ params.reasoning_budget != 0, + /* enable_thinking */ enable_thinking, }; } diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 479eacdd5..5efa9cdd0 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -59,9 +59,9 @@ static T json_value(const json & body, const std::string & key, const T & defaul if (body.contains(key) && !body.at(key).is_null()) { try { return body.at(key); - } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) { + } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const& err) { std::stringstream ss; - ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value."; + ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value: "<< err.what(); LOG_WARNING(ss.str().c_str(), body); return default_value; } @@ -557,6 +557,18 @@ static json oaicompat_chat_params_parse( inputs.chat_template_kwargs[item.key()] = item.value().dump(); } + // parse the "enable_thinking" kwarg to override the default value + auto enable_thinking_kwarg = json_value(inputs.chat_template_kwargs, "enable_thinking", std::string("")); + if (enable_thinking_kwarg == "true") { + inputs.enable_thinking = true; + } + else if (enable_thinking_kwarg == "false") { + inputs.enable_thinking = false; + } + else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') { + throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)"); + } + /*"whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n" "when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled\n"*/ bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" &&opt.prefill_assistant; @@ -572,7 +584,7 @@ static json oaicompat_chat_params_parse( /* TODO: test this properly */ inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE; - if ((!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) { + if (inputs.enable_thinking) { throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking."); } inputs.add_generation_prompt = true; From 5e4b9a54c8da3f29c2921160baff6bfdb39f7758 Mon Sep 17 00:00:00 2001 From: Jesse Date: Mon, 8 Sep 2025 10:59:48 -0400 Subject: [PATCH 3/4] chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style) (#15533) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add DeepSeek V3.1 thinking mode support - Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value - Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation) - Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format: - Extracts reasoning content before '' tag into reasoning_content - Extracts regular content after '' tag into content - No opening '' tag in V3.1 format - Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking' - Added V3.1 case to parsing switch statement This addresses the issue where V3.1 outputs reasoning content followed by '' and then regular content without the opening '' tag. * Another attempt by V3.1 non-thinking * Fix test, but it's not asserting anything. * Ignore vim swap files in tests dir * Update the test * Try using try_find_literal instead of regex * passing test * Revert "Try using try_find_literal instead of regex" This reverts commit c50d887ec2780dd9e6b8b397e92347d3db8d5575. * Remove unnecessary change * Remove comment * Add code to handle non-thinking mode. * Try to set message['prefix'] when thinking is enabled. * This fixes reasoning, but breaks normal content. We need state in the chat parser. * DeepSeek V3.1 thinking is now the default. Disable with `--reasoning-budget 0`. * Simplify (DeepSeek V3.1 reasoning) * Fix sign inversion bug * Add some tool calling code (not working). * Tool calls working in non-reasoning mode. * Attempt a unit test for tool call parsing. * Passing test * Add tests for both happy path and broken fenced DeepSeek V3.1 tool call variants. * Passing DeepSeek V3.1 tool call tests, but model is not working. * Revert assistance response prefill change. Not my monkeys. * Add fenced_thinking unit test variant. Passes, but thinking tool calling still isn't working for some reason. * Tests pass in reasoning mode. Also e2e tool test passes. * Make a copy of the parse_json_tool_calls function for deepseek-v3.1 so as to not accidentally introduce regressions. * Fix thinking_forced_open logic. tool calling broken. Need to add another test case. * That's what I get for cargo culting a newline. * Add multi tool call test for deepseek v3.1 non-reasoning * Move test, remove .gitignore change * Place deepseek-v3.1 reasoning test directly into existing reasoning function per CISC's request. * Address whitespace CI failure. * Merge two assert_equals per CISC's request. * Add DeepSeek-V3.1 tests to tests/test-chat.cpp per CISC's request. * Merge deepseek V3.1 and regular parse_json_tool_calls() function behaviors by adding optional update_cursor argument. * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * DeepSeek V3.1 fix reasoning_format none * Strip grammar down to strictly what we expect based on model card. Throw out parts we cargo culted from R1 that don't make sense. * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * DeepSeek V3.1 - Add edge case where thinking is forced open, there is tool calling in the reasoning content, but then the model just stops the output without closing the tag, so it's not a partial. In this case, use the tool call in the reasoning content. * DeepSeek V3.1 - simplify update_cursor * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Fix indent --------- Co-authored-by: openhands Co-authored-by: Sigbjørn Skjæret --- common/chat.cpp | 139 +++++++++++++ common/chat.h | 1 + models/templates/README.md | 1 + .../templates/deepseek-ai-DeepSeek-V3.1.jinja | 3 + tests/test-chat-parser.cpp | 193 +++++++++++++++++- 5 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 models/templates/deepseek-ai-DeepSeek-V3.1.jinja diff --git a/common/chat.cpp b/common/chat.cpp index f4817a55c..3358ac05d 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -632,6 +632,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1"; case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; @@ -700,11 +701,13 @@ static void parse_json_tool_calls( size_t from = std::string::npos; auto first = true; while (true) { + auto start_pos = builder.pos(); auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) : function_regex ? builder.try_find_regex(*function_regex, from) : std::nullopt; + if (res) { std::string name; if (get_function_name) { @@ -739,6 +742,8 @@ static void parse_json_tool_calls( return; } throw common_chat_msg_partial_exception("incomplete tool call"); + } else { + builder.move_to(start_pos); } break; } @@ -1329,6 +1334,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ } return data; } + +static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + // Pass thinking context for DeepSeek V3.1 template + json additional_context = { + {"thinking", inputs.enable_thinking}, + }; + + auto prompt = apply(tmpl, inputs, + /* messages_override= */ inputs.messages, + /* tools_override= */ std::nullopt, + additional_context); + data.prompt = prompt; + data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; + if (string_ends_with(data.prompt, "")) { + if (!inputs.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>" + "\" " + builder.add_schema(name + "-args", parameters) + " " + "\"<|tool▁call▁end|>\"")); + }); + // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, + // so we accept common variants (then it's all constrained) + builder.add_rule("root", + std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + + "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " + "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " + "\"<|tool▁calls▁end|>\"" + " space"); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, + // If thinking_forced_open, then we capture the tag in the grammar, + // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) + std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + + "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" + }); + data.preserved_tokens = { + "", + "", + "<|tool▁calls▁begin|>", + "<|tool▁call▁begin|>", + "<|tool▁sep|>", + "<|tool▁call▁end|>", + "<|tool▁calls▁end|>", + }; + }); + } + return data; +} + static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { builder.try_parse_reasoning("", ""); if (!builder.syntax().parse_tool_calls) { @@ -1350,6 +1420,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { tool_calls_end); } +static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) { + static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)"); + + static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>"); + static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)"); + static const common_regex tool_calls_end("<|tool▁calls▁end|>"); + + if (!builder.syntax().parse_tool_calls) { + LOG("%s: not parse_tool_calls\n", __func__); + builder.add_content(builder.consume_rest()); + return; + } + + LOG("%s: parse_tool_calls\n", __func__); + + parse_json_tool_calls( + builder, + /* block_open= */ tool_calls_begin, + /* function_regex_start_only= */ std::nullopt, + function_regex, + close_regex, + tool_calls_end); +} + +static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { + // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content + // First try to parse using the standard reasoning parsing method + LOG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); + + auto start_pos = builder.pos(); + auto found_end_think = builder.try_find_literal(""); + builder.move_to(start_pos); + + if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { + LOG("%s: no end_think, not partial, adding content\n", __func__); + common_chat_parse_deepseek_v3_1_content(builder); + } else if (builder.try_parse_reasoning("", "")) { + // If reasoning was parsed successfully, the remaining content is regular content + LOG("%s: parsed reasoning, adding content\n", __func__); + // <|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|> + common_chat_parse_deepseek_v3_1_content(builder); + } else { + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { + LOG("%s: reasoning_format none, adding content\n", __func__); + common_chat_parse_deepseek_v3_1_content(builder); + return; + } + // If no reasoning tags found, check if we should treat everything as reasoning + if (builder.syntax().thinking_forced_open) { + // If thinking is forced open but no tags found, treat everything as reasoning + LOG("%s: thinking_forced_open, adding reasoning content\n", __func__); + builder.add_reasoning_content(builder.consume_rest()); + } else { + LOG("%s: no thinking_forced_open, adding content\n", __func__); + // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|> + common_chat_parse_deepseek_v3_1_content(builder); + } + } +} + static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; auto prompt = apply(tmpl, inputs); @@ -2137,6 +2267,12 @@ static common_chat_params common_chat_templates_apply_jinja( } } + // DeepSeek V3.1: detect based on specific patterns in the template + if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos && + params.json_schema.is_null()) { + return common_chat_params_init_deepseek_v3_1(tmpl, params); + } + // DeepSeek R1: use handler in all cases except json schema (thinking / tools). if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_deepseek_r1(tmpl, params); @@ -2299,6 +2435,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_DEEPSEEK_R1: common_chat_parse_deepseek_r1(builder); break; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: + common_chat_parse_deepseek_v3_1(builder); + break; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: common_chat_parse_functionary_v3_2(builder); break; diff --git a/common/chat.h b/common/chat.h index 05fe75be1..55180e317 100644 --- a/common/chat.h +++ b/common/chat.h @@ -107,6 +107,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_FIREFUNCTION_V2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, COMMON_CHAT_FORMAT_HERMES_2_PRO, COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_GRANITE, diff --git a/models/templates/README.md b/models/templates/README.md index 2e8eaa595..3a649b8f4 100644 --- a/models/templates/README.md +++ b/models/templates/README.md @@ -22,4 +22,5 @@ These templates can be updated with the following commands: ./scripts/get_chat_template.py Qwen/QwQ-32B > models/templates/Qwen-QwQ-32B.jinja ./scripts/get_chat_template.py Qwen/Qwen3-0.6B > models/templates/Qwen-Qwen3-0.6B.jinja ./scripts/get_chat_template.py zai-org/GLM-4.5 > models/templates/zai-org-GLM-4.5.jinja +./scripts/get_chat_template.py deepseek-ai/DeepSeek-V3.1 > models/templates/deepseek-ai-DeepSeek-V3.1.jinja ``` diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja new file mode 100644 index 000000000..e5656196a --- /dev/null +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -0,0 +1,3 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}} {%- else %}{{''}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '' in content %}{%- set content = content.split('', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{''}}{%- else %}{{''}}{%- endif %}{% endif %} \ No newline at end of file diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp index 2113a1284..aec38ba33 100644 --- a/tests/test-chat-parser.cpp +++ b/tests/test-chat-parser.cpp @@ -18,14 +18,20 @@ using json = nlohmann::ordered_json; template -static void assert_equals(const T & expected, const T & actual) { +static void assert_equals(const std::string_view label, const T & expected, const T & actual) { if (expected != actual) { + std::cerr << label << std::endl; std::cerr << "Expected: " << expected << std::endl; std::cerr << "Actual: " << actual << std::endl; std::cerr << std::flush; throw std::runtime_error("Test failed"); } } + +template +static void assert_equals(const T & expected, const T & actual) { + assert_equals("", expected, actual); +} static void assert_equals(const char * expected, const std::string & actual) { return assert_equals(expected, actual); } @@ -49,6 +55,7 @@ static void assert_throws(const std::function & fn, const std::string & } static void test_reasoning() { + //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); { common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, { /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY, @@ -102,6 +109,36 @@ static void test_reasoning() { assert_equals("Cogito", builder.result().content); assert_equals("Ergo sum", builder.consume_rest()); } + // Test DeepSeek V3.1 parsing - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("deepseek_v3_1_reasoning_format_deepseek"); + common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, syntax); + assert_equals(variant, true, builder.try_parse_reasoning("", "")); + assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content); + assert_equals(variant, std::string("ok"), builder.consume_rest()); + } + // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("deepseek_v3_1_reasoning_format_none"); + const std::string input = "REASONINGok"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, std::string("REASONINGok"), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + } } static void test_regex() { @@ -189,6 +226,159 @@ static void test(const std::string & input, bool is_partial, const std::vectoris_partial); assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get() : js->value.dump()); } + +static void test_deepseek_v3_1_tool_calls() { + //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); + // variant: happy path for when it works as the model card says it should + const std::string variant("simple"); + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, 1, msg.tool_calls.size()); + assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name); + // JSON arguments are dumped without spaces + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments); + assert_equals(variant, std::string(""), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + + // variant: simple + thinking open + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_thinking"); + const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: simple + multiple tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_multiple_tool_calls"); + const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 2, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } + + + // variant: thinking forced open + tool call in reasoning content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning"); + const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + not partial + // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting + // to make tool calls in reasoning content according to the model card, but it does sometimes, so + // add the reasoning content as regular content and parse the tool calls. + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial"); + const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, std::string("REASONING"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + partial + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial"); + const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; + auto m = common_chat_parse(in, /* is_partial= */ true, syntax); + assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, 0, m.tool_calls.size()); + } + + // variant: thinking not forced open + reasoning + regular content + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls"); + const std::string in = "REASONINGCONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: thinking not forced open + missing reasoning + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls"); + const std::string in = "CONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } +} + static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) { common_chat_msg_parser builder(input, parse_as_partial, {}); auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {}); @@ -350,6 +540,7 @@ int main() { test_json_with_dumped_args(); test_reasoning(); test_regex(); + test_deepseek_v3_1_tool_calls(); std::cout << "All tests passed!\n"; return 0; } From c27edd95cb6997274cba67691a1c14e9ebdb0b83 Mon Sep 17 00:00:00 2001 From: firecoperana Date: Wed, 10 Sep 2025 19:25:14 -0500 Subject: [PATCH 4/4] Remove enable thinking log --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index aa9ab147c..06ba38ee9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1360,7 +1360,7 @@ struct server_context { // 1. It's not explicitly disabled (reasoning_budget == 0) // 2. The chat template supports it const bool enable_thinking = params.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates.get()); - LLAMA_LOG_INFO("Enable thinking? %d\n", enable_thinking); + //LLAMA_LOG_INFO("Enable thinking? %d\n", enable_thinking); oai_parser_opt = { /* use_jinja */ params.use_jinja,