ggml-org · KiruyaMomochi · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {
         case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
         case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
         case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
+        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -698,7 +699,8 @@ static void parse_json_tool_calls(
     const common_regex & close_regex,
     const std::optional<common_regex> & block_close,
     bool allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
+    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr,
+    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_id = nullptr) {
 
     auto parse_tool_calls = [&]() {
         size_t from = std::string::npos;
@@ -713,12 +715,18 @@ static void parse_json_tool_calls(
 
             if (res) {
                 std::string name;
+                std::string id;
                 if (get_function_name) {
                     name = get_function_name(*res);
                 } else {
                     GGML_ASSERT(res->groups.size() == 2);
                     name = builder.str(res->groups[1]);
                 }
+                if (get_function_id) {
+                    id = get_function_id(*res);
+                } else {
+                    id = "";
+                }
                 first = false;
                 if (name.empty()) {
                     // get_function_name signalled us that we should skip this match and treat it as content.
@@ -730,7 +738,7 @@ static void parse_json_tool_calls(
                 auto maybe_raw_python = name == "python" && allow_raw_python;
                 if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
                     if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
+                        if (!builder.add_tool_call(name, id, arguments->value) || arguments->is_partial) {
                             throw common_chat_msg_partial_exception("incomplete tool call");
                         }
                         builder.consume_regex(close_regex);
@@ -739,7 +747,7 @@ static void parse_json_tool_calls(
                 }
                 if (maybe_raw_python) {
                     auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
+                    if (!builder.add_tool_call(name, id, arguments)) {
                         throw common_chat_msg_partial_exception("incomplete tool call");
                     }
                     return;
@@ -1726,6 +1734,69 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
     return data;
 }
 
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & inputs) {
+    common_chat_params data;
+
+    // Pass thinking context for Kimi K2 template
+    json additional_context = {
+        {"thinking", inputs.enable_thinking},
+    };
+
+    auto prompt = apply(tmpl, inputs,
+                       /* messages_override= */ inputs.messages,
+                       /* tools_override= */ std::nullopt,
+                       additional_context);
+    data.prompt = prompt;
+    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
+    if (string_ends_with(data.prompt, "<think>")) {
+        if (!inputs.enable_thinking) {
+            data.prompt += "</think>";
+        } else {
+            data.thinking_forced_open = true;
+        }
+    }
+    if (inputs.tools.is_array() && !inputs.tools.empty()) {
+        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
+        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+            // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
+            std::vector<std::string> tool_rules;
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto number = builder.add_rule("number", "[0-9]+");
+                const auto & function = tool.at("function");
+                std::string name = function.at("name");
+                auto parameters = function.at("parameters");
+                builder.resolve_refs(parameters);
+                tool_rules.push_back(builder.add_rule(name + "-call",
+                    "\"<|tool_call_begin|>functions." + name + ":\" " + number + " \"<|tool_call_argument_begin|>"
+                    "\" " + builder.add_schema(name + "-args", parameters) + " "
+                    "\"<|tool_call_end|>\""));
+            });
+            builder.add_rule("root",
+                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
+                "( \"<|tool_calls_section_begin|>\" ) "
+                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
+                "\"<|tool_calls_section_end|>\"");
+            data.grammar_triggers.push_back({
+                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
+                // If thinking_forced_open, then we capture the </think> tag in the grammar,
+                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
+                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
+                    "(<\\|tool_calls_section_begin\\|>)[\\s\\S]*"
+            });
+            data.preserved_tokens = {
+                "<think>",
+                "</think>",
+                "<|tool_calls_section_begin|>",
+                "<|tool_call_begin|>",
+                "<|tool_call_argument_begin|>",
+                "<|tool_call_end|>",
+                "<|tool_calls_section_end|>",
+            };
+        });
+    }
+    return data;
+}
+
 static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
     builder.try_parse_reasoning("<think>", "</think>");
     if (!builder.syntax().parse_tool_calls) {
@@ -1807,6 +1878,91 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
     }
 }
 
+static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
+    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
+
+    static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
+    static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
+    static const common_regex tool_calls_end("<\\|tool_calls_section_end\\|>");
+
+    if (!builder.syntax().parse_tool_calls) {
+        LOG_DBG("%s: not parse_tool_calls\n", __func__);
+        builder.add_content(builder.consume_rest());
+        return;
+    }
+
+    LOG_DBG("%s: parse_tool_calls\n", __func__);
+
+    parse_json_tool_calls(
+        builder,
+        /* block_open= */ tool_calls_begin,
+        /* function_regex_start_only= */ std::nullopt,
+        function_regex,
+        close_regex,
+        tool_calls_end,
+        /* allow_raw_python */ false,
+        /* get_function_name= */ [&](const auto & res) -> std::string {
+            auto function_id = builder.str(res.groups[1]);
+
+            auto dot_pos = function_id.find(".");
+            if (dot_pos == std::string::npos) {
+                return "";
+            }
+
+            auto colon_pos = function_id.find(':', dot_pos + 1);
+            if (colon_pos == std::string::npos)
+                return function_id.substr(dot_pos + 1);
+            else
+                return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
+        },
+        /* get_function_id= */ [&](const auto & res) -> std::string {
+            auto function_id = builder.str(res.groups[1]);
+
+            auto dot_pos = function_id.find(".");
+            if (dot_pos == std::string::npos) {
+                return "";
+            }
+            return function_id;
+        }
+    );
+}
+
+static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
+    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
+    // First try to parse using the standard reasoning parsing method
+    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
+
+    auto start_pos = builder.pos();
+    auto found_end_think = builder.try_find_literal("</think>");
+    builder.move_to(start_pos);
+
+    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
+        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
+        common_chat_parse_kimi_k2_content(builder);
+    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
+        // If reasoning was parsed successfully, the remaining content is regular content
+        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
+        // </think><|tool_calls_section_begin|><|tool_call_begin|>function<|tool_call_argument_begin|>NAME\n```json\nJSON\n```<|tool_call_end|><|tool_calls_section_end|>
+        common_chat_parse_kimi_k2_content(builder);
+    } else {
+        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
+          common_chat_parse_kimi_k2_content(builder);
+          return;
+        }
+        // If no reasoning tags found, check if we should treat everything as reasoning
+        if (builder.syntax().thinking_forced_open) {
+            // If thinking is forced open but no tags found, treat everything as reasoning
+            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
+            builder.add_reasoning_content(builder.consume_rest());
+        } else {
+            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
+            // <|tool_call_begin|>NAME<|tool_call_argument_begin|>JSON<|tool_call_end|>
+            common_chat_parse_kimi_k2_content(builder);
+        }
+    }
+}
+
 static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
     common_chat_params data;
 
@@ -2912,6 +3068,12 @@ static common_chat_params common_chat_templates_apply_jinja(
         return common_chat_params_init_deepseek_v3_1(tmpl, params);
     }
 
+    // Kimi K2: detect based on specific patterns in the template
+    if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+        params.json_schema.is_null()) {
+        return common_chat_params_init_kimi_k2(tmpl, params);
+    }
+
     // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
     if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
         return common_chat_params_init_deepseek_r1(tmpl, params);
@@ -3139,6 +3301,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
         case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
             common_chat_parse_lfm2(builder);
             break;
+        case COMMON_CHAT_FORMAT_KIMI_K2:
+            common_chat_parse_kimi_k2(builder);
+            break;
         default:
             throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
     }

diff --git a/common/chat.h b/common/chat.h
@@ -117,6 +117,7 @@ enum common_chat_format {
     COMMON_CHAT_FORMAT_NEMOTRON_V2,
     COMMON_CHAT_FORMAT_APERTUS,
     COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
+    COMMON_CHAT_FORMAT_KIMI_K2,
 
     COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
 };

diff --git a/models/templates/moonshotai-Kimi-K2-Thinking.jinja b/models/templates/moonshotai-Kimi-K2-Thinking.jinja
@@ -0,0 +1,97 @@
+{%- macro render_content(msg) -%}
+    {%- set c = msg.get('content') -%}
+    {%- if c is string -%}
+      {{ c }}
+    {%- elif c is not none -%}
+      {% for content in c -%}
+        {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+          <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+        {% else -%}
+          {{ content['text'] }}
+        {%- endif -%}
+      {%- endfor -%}
+    {%- endif -%}
+{%- endmacro -%}
+
+{% macro set_roles(message) -%}
+  {%- set role_name =  message.get('name') or  message['role'] -%}
+  {%- if message['role'] == 'user' -%}
+    <|im_user|>{{role_name}}<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>{{role_name}}<|im_middle|>
+  {%- else -%}
+    <|im_system|>{{role_name}}<|im_middle|>
+  {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro render_toolcalls(message) -%}
+  <|tool_calls_section_begin|>
+  {%- for tool_call in message['tool_calls'] -%}
+    {%- set formatted_id = tool_call['id'] -%}
+    <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+  {%- endfor -%}
+  <|tool_calls_section_end|>
+{%- endmacro -%}
+
+
+{# Find last non-tool-call assisitant message #}
+{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
+{%- for idx in range(messages|length-1, -1, -1) -%}
+    {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
+        {%- set ns.last_non_tool_call_assistant_msg = idx -%}
+        {%- break -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
+{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
+{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
+
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson() }}<|im_end|>
+{%- endif -%}
+
+{%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
+  <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+{%- endif -%}
+
+{%- for message in hist_msgs -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    <think></think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+      {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+{%- for message in suffix_msgs -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    {%- set rc = message.get('reasoning_content', '') -%}
+    <think>{{rc}}</think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+     {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}