diff --git a/common/chat.cpp b/common/chat.cpp index 938872e82ee1d..256d10a68a45b 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools"; + case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2"; default: throw std::runtime_error("Unknown chat format"); } @@ -698,7 +699,8 @@ static void parse_json_tool_calls( const common_regex & close_regex, const std::optional & block_close, bool allow_raw_python = false, - const std::function & get_function_name = nullptr) { + const std::function & get_function_name = nullptr, + const std::function & get_function_id = nullptr) { auto parse_tool_calls = [&]() { size_t from = std::string::npos; @@ -713,12 +715,18 @@ static void parse_json_tool_calls( if (res) { std::string name; + std::string id; if (get_function_name) { name = get_function_name(*res); } else { GGML_ASSERT(res->groups.size() == 2); name = builder.str(res->groups[1]); } + if (get_function_id) { + id = get_function_id(*res); + } else { + id = ""; + } first = false; if (name.empty()) { // get_function_name signalled us that we should skip this match and treat it as content. @@ -730,7 +738,7 @@ static void parse_json_tool_calls( auto maybe_raw_python = name == "python" && allow_raw_python; if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) { if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) { - if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) { + if (!builder.add_tool_call(name, id, arguments->value) || arguments->is_partial) { throw common_chat_msg_partial_exception("incomplete tool call"); } builder.consume_regex(close_regex); @@ -739,7 +747,7 @@ static void parse_json_tool_calls( } if (maybe_raw_python) { auto arguments = wrap_code_as_arguments(builder, builder.consume_rest()); - if (!builder.add_tool_call(name, "", arguments)) { + if (!builder.add_tool_call(name, id, arguments)) { throw common_chat_msg_partial_exception("incomplete tool call"); } return; @@ -1726,6 +1734,69 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha return data; } +static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + // Pass thinking context for Kimi K2 template + json additional_context = { + {"thinking", inputs.enable_thinking}, + }; + + auto prompt = apply(tmpl, inputs, + /* messages_override= */ inputs.messages, + /* tools_override= */ std::nullopt, + additional_context); + data.prompt = prompt; + data.format = COMMON_CHAT_FORMAT_KIMI_K2; + if (string_ends_with(data.prompt, "")) { + if (!inputs.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto number = builder.add_rule("number", "[0-9]+"); + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "\"<|tool_call_begin|>functions." + name + ":\" " + number + " \"<|tool_call_argument_begin|>" + "\" " + builder.add_schema(name + "-args", parameters) + " " + "\"<|tool_call_end|>\"")); + }); + builder.add_rule("root", + std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + + "( \"<|tool_calls_section_begin|>\" ) " + "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " + "\"<|tool_calls_section_end|>\""); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, + // If thinking_forced_open, then we capture the tag in the grammar, + // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) + std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + + "(<\\|tool_calls_section_begin\\|>)[\\s\\S]*" + }); + data.preserved_tokens = { + "", + "", + "<|tool_calls_section_begin|>", + "<|tool_call_begin|>", + "<|tool_call_argument_begin|>", + "<|tool_call_end|>", + "<|tool_calls_section_end|>", + }; + }); + } + return data; +} + static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { builder.try_parse_reasoning("", ""); if (!builder.syntax().parse_tool_calls) { @@ -1807,6 +1878,91 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { } } +static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) { + static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)"); + + static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>"); + static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)"); + static const common_regex tool_calls_end("<\\|tool_calls_section_end\\|>"); + + if (!builder.syntax().parse_tool_calls) { + LOG_DBG("%s: not parse_tool_calls\n", __func__); + builder.add_content(builder.consume_rest()); + return; + } + + LOG_DBG("%s: parse_tool_calls\n", __func__); + + parse_json_tool_calls( + builder, + /* block_open= */ tool_calls_begin, + /* function_regex_start_only= */ std::nullopt, + function_regex, + close_regex, + tool_calls_end, + /* allow_raw_python */ false, + /* get_function_name= */ [&](const auto & res) -> std::string { + auto function_id = builder.str(res.groups[1]); + + auto dot_pos = function_id.find("."); + if (dot_pos == std::string::npos) { + return ""; + } + + auto colon_pos = function_id.find(':', dot_pos + 1); + if (colon_pos == std::string::npos) + return function_id.substr(dot_pos + 1); + else + return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1)); + }, + /* get_function_id= */ [&](const auto & res) -> std::string { + auto function_id = builder.str(res.groups[1]); + + auto dot_pos = function_id.find("."); + if (dot_pos == std::string::npos) { + return ""; + } + return function_id; + } + ); +} + +static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) { + // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content + // First try to parse using the standard reasoning parsing method + LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); + + auto start_pos = builder.pos(); + auto found_end_think = builder.try_find_literal(""); + builder.move_to(start_pos); + + if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { + LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); + common_chat_parse_kimi_k2_content(builder); + } else if (builder.try_parse_reasoning("", "")) { + // If reasoning was parsed successfully, the remaining content is regular content + LOG_DBG("%s: parsed reasoning, adding content\n", __func__); + // <|tool_calls_section_begin|><|tool_call_begin|>function<|tool_call_argument_begin|>NAME\n```json\nJSON\n```<|tool_call_end|><|tool_calls_section_end|> + common_chat_parse_kimi_k2_content(builder); + } else { + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { + LOG_DBG("%s: reasoning_format none, adding content\n", __func__); + common_chat_parse_kimi_k2_content(builder); + return; + } + // If no reasoning tags found, check if we should treat everything as reasoning + if (builder.syntax().thinking_forced_open) { + // If thinking is forced open but no tags found, treat everything as reasoning + LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); + builder.add_reasoning_content(builder.consume_rest()); + } else { + LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); + // <|tool_call_begin|>NAME<|tool_call_argument_begin|>JSON<|tool_call_end|> + common_chat_parse_kimi_k2_content(builder); + } + } +} + static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; @@ -2912,6 +3068,12 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_deepseek_v3_1(tmpl, params); } + // Kimi K2: detect based on specific patterns in the template + if (src.find("<|tool_calls_section_begin|>") != std::string::npos && + params.json_schema.is_null()) { + return common_chat_params_init_kimi_k2(tmpl, params); + } + // DeepSeek R1: use handler in all cases except json schema (thinking / tools). if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_deepseek_r1(tmpl, params); @@ -3139,6 +3301,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: common_chat_parse_lfm2(builder); break; + case COMMON_CHAT_FORMAT_KIMI_K2: + common_chat_parse_kimi_k2(builder); + break; default: throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); } diff --git a/common/chat.h b/common/chat.h index 50efb0d4e516f..74385f2058c1d 100644 --- a/common/chat.h +++ b/common/chat.h @@ -117,6 +117,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_NEMOTRON_V2, COMMON_CHAT_FORMAT_APERTUS, COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, + COMMON_CHAT_FORMAT_KIMI_K2, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; diff --git a/models/templates/moonshotai-Kimi-K2-Thinking.jinja b/models/templates/moonshotai-Kimi-K2-Thinking.jinja new file mode 100644 index 0000000000000..26f99b7d19f74 --- /dev/null +++ b/models/templates/moonshotai-Kimi-K2-Thinking.jinja @@ -0,0 +1,97 @@ +{%- macro render_content(msg) -%} + {%- set c = msg.get('content') -%} + {%- if c is string -%} + {{ c }} + {%- elif c is not none -%} + {% for content in c -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} +{%- endmacro -%} + +{% macro set_roles(message) -%} + {%- set role_name = message.get('name') or message['role'] -%} + {%- if message['role'] == 'user' -%} + <|im_user|>{{role_name}}<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>{{role_name}}<|im_middle|> + {%- else -%} + <|im_system|>{{role_name}}<|im_middle|> + {%- endif -%} +{%- endmacro -%} + + +{%- macro render_toolcalls(message) -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set formatted_id = tool_call['id'] -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> +{%- endmacro -%} + + +{# Find last non-tool-call assisitant message #} +{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%} +{%- for idx in range(messages|length-1, -1, -1) -%} + {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%} + {%- set ns.last_non_tool_call_assistant_msg = idx -%} + {%- break -%} + {%- endif -%} +{%- endfor -%} + +{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#} +{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%} +{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%} + +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson() }}<|im_end|> +{%- endif -%} + +{%- if messages|length == 0 or messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +{%- endif -%} + +{%- for message in hist_msgs -%} + {{set_roles(message)}} + {%- if message['role'] == 'assistant' -%} + {{render_content(message)}} + {%- if message.get('tool_calls') -%} + {{render_toolcalls(message)}} + {%- endif -%} + {%- elif message['role'] == 'tool' -%} + {%- set tool_call_id = message.tool_call_id -%} + ## Return of {{ tool_call_id }} +{{render_content(message)}} + {%- elif message['content'] is not none -%} + {{render_content(message)}} + {%- endif -%} + <|im_end|> +{%- endfor -%} + +{%- for message in suffix_msgs -%} + {{set_roles(message)}} + {%- if message['role'] == 'assistant' -%} + {%- set rc = message.get('reasoning_content', '') -%} + {{rc}}{{render_content(message)}} + {%- if message.get('tool_calls') -%} + {{render_toolcalls(message)}} + {%- endif -%} + {%- elif message['role'] == 'tool' -%} + {%- set tool_call_id = message.tool_call_id -%} + ## Return of {{ tool_call_id }} +{{render_content(message)}} + {%- elif message['content'] is not none -%} + {{render_content(message)}} + {%- endif -%} + <|im_end|> +{%- endfor -%} + + +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp index 4766518fe6955..dd1ecccedb78f 100644 --- a/tests/test-chat-parser.cpp +++ b/tests/test-chat-parser.cpp @@ -164,6 +164,36 @@ static void test_reasoning() { assert_equals(variant, std::string("REASONINGok"), msg.content); assert_equals(variant, std::string(""), msg.reasoning_content); } + // Test Kimi K2 parsing - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("kimi_k2_reasoning_format_deepseek"); + common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, syntax); + assert_equals(variant, true, builder.try_parse_reasoning("", "")); + assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content); + assert_equals(variant, std::string("ok"), builder.consume_rest()); + } + // Test Kimi K2 parsing - reasoning_format none - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("kimi_k2_reasoning_format_none"); + const std::string input = "REASONINGok"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, std::string("REASONINGok"), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + } } static void test_regex() { @@ -404,6 +434,158 @@ static void test_deepseek_v3_1_tool_calls() { } } +static void test_kimi_k2_tool_calls() { + //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); + // variant: happy path for when it works as the model card says it should + const std::string variant("simple"); + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, 1, msg.tool_calls.size()); + assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name); + // JSON arguments are dumped without spaces + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments); + assert_equals(variant, std::string(""), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + + // variant: simple + thinking open + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_thinking"); + const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: simple + multiple tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_multiple_tool_calls"); + const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 2, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } + + + // variant: thinking forced open + tool call in reasoning content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning"); + const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + not partial + // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting + // to make tool calls in reasoning content according to the model card, but it does sometimes, so + // add the reasoning content as regular content and parse the tool calls. + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial"); + const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, std::string("REASONING"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + partial + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial"); + const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"; + auto m = common_chat_parse(in, /* is_partial= */ true, syntax); + assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, 0, m.tool_calls.size()); + } + + // variant: thinking not forced open + reasoning + regular content + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls"); + const std::string in = "REASONINGCONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: thinking not forced open + missing reasoning + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls"); + const std::string in = "CONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } +} + static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) { common_chat_msg_parser builder(input, parse_as_partial, {}); auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {}); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4a8ba849b3f8c..b436113f103c5 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2067,6 +2067,142 @@ static void test_template_output_parsers() { /* .parse_tool_calls = */ true, })); } + { + auto tmpls = read_templates("models/templates/moonshotai-Kimi-K2-Thinking.jinja"); + std::vector end_tokens{ "<|im_end|>" }; + + for (const auto & inputs : { inputs_no_tools, inputs_tools }) { + auto params = common_chat_templates_apply(tmpls.get(), inputs); + assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, params.format); + assert_equals(false, params.thinking_forced_open); + } + + test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + assert_msg_equals( + simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + })); + // variant: thinking forced open, reasoning_format none + assert_msg_equals( + simple_assist_msg("REASONINGok", ""), + common_chat_parse( + "REASONINGok", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: happy path for when it works as the model card says it should + assert_msg_equals( + simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"), + common_chat_parse( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + // variant: simple + thinking open + assert_msg_equals( + simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"), + common_chat_parse( + "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: simple + multiple tool calls + common_chat_msg message_assist_multiple_calls; + message_assist_multiple_calls.role = "assistant"; + message_assist_multiple_calls.content = "CONTENT"; + message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", "functions.get_time:0"}); + message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", "functions.get_weather:1"}); + assert_msg_equals( + message_assist_multiple_calls, + common_chat_parse( + "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>functions.get_weather:1<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + assert_msg_equals( + simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:1"), + common_chat_parse( + "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:1<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + no closing think + not partial + // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting + // to make tool calls in reasoning content according to the model card, but it does sometimes, so + // add the reasoning content as regular content and parse the tool calls. + assert_msg_equals( + simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"), + common_chat_parse( + "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + no closing think + partial + assert_msg_equals( + simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""), + common_chat_parse( + "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", + /* is_partial= */ true, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking not forced open + missing reasoning + no tool calls + assert_msg_equals( + simple_assist_msg("CONTENT", ""), + common_chat_parse( + "CONTENT", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_KIMI_K2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + } { auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja"); std::vector end_tokens{ "<|assistant_end|>" };