Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 168 additions & 3 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
default:
throw std::runtime_error("Unknown chat format");
}
Expand Down Expand Up @@ -698,7 +699,8 @@ static void parse_json_tool_calls(
const common_regex & close_regex,
const std::optional<common_regex> & block_close,
bool allow_raw_python = false,
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr,
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_id = nullptr) {

auto parse_tool_calls = [&]() {
size_t from = std::string::npos;
Expand All @@ -713,12 +715,18 @@ static void parse_json_tool_calls(

if (res) {
std::string name;
std::string id;
if (get_function_name) {
name = get_function_name(*res);
} else {
GGML_ASSERT(res->groups.size() == 2);
name = builder.str(res->groups[1]);
}
if (get_function_id) {
id = get_function_id(*res);
} else {
id = "";
}
first = false;
if (name.empty()) {
// get_function_name signalled us that we should skip this match and treat it as content.
Expand All @@ -730,7 +738,7 @@ static void parse_json_tool_calls(
auto maybe_raw_python = name == "python" && allow_raw_python;
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
if (!builder.add_tool_call(name, id, arguments->value) || arguments->is_partial) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
builder.consume_regex(close_regex);
Expand All @@ -739,7 +747,7 @@ static void parse_json_tool_calls(
}
if (maybe_raw_python) {
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
if (!builder.add_tool_call(name, "", arguments)) {
if (!builder.add_tool_call(name, id, arguments)) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
return;
Expand Down Expand Up @@ -1726,6 +1734,69 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
return data;
}

static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;

// Pass thinking context for Kimi K2 template
json additional_context = {
{"thinking", inputs.enable_thinking},
};

auto prompt = apply(tmpl, inputs,
/* messages_override= */ inputs.messages,
/* tools_override= */ std::nullopt,
additional_context);
data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_KIMI_K2;
if (string_ends_with(data.prompt, "<think>")) {
if (!inputs.enable_thinking) {
data.prompt += "</think>";
} else {
data.thinking_forced_open = true;
}
}
if (inputs.tools.is_array() && !inputs.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
// https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
std::vector<std::string> tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
const auto number = builder.add_rule("number", "[0-9]+");
const auto & function = tool.at("function");
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
tool_rules.push_back(builder.add_rule(name + "-call",
"\"<|tool_call_begin|>functions." + name + ":\" " + number + " \"<|tool_call_argument_begin|>"
"\" " + builder.add_schema(name + "-args", parameters) + " "
"\"<|tool_call_end|>\""));
});
builder.add_rule("root",
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
"( \"<|tool_calls_section_begin|>\" ) "
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<|tool_calls_section_end|>\"");
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
// If thinking_forced_open, then we capture the </think> tag in the grammar,
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
"(<\\|tool_calls_section_begin\\|>)[\\s\\S]*"
});
data.preserved_tokens = {
"<think>",
"</think>",
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>",
"<|tool_call_argument_begin|>",
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
};
});
}
return data;
}

static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("<think>", "</think>");
if (!builder.syntax().parse_tool_calls) {
Expand Down Expand Up @@ -1807,6 +1878,91 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
}
}

static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");

static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
static const common_regex tool_calls_end("<\\|tool_calls_section_end\\|>");

if (!builder.syntax().parse_tool_calls) {
LOG_DBG("%s: not parse_tool_calls\n", __func__);
builder.add_content(builder.consume_rest());
return;
}

LOG_DBG("%s: parse_tool_calls\n", __func__);

parse_json_tool_calls(
builder,
/* block_open= */ tool_calls_begin,
/* function_regex_start_only= */ std::nullopt,
function_regex,
close_regex,
tool_calls_end,
/* allow_raw_python */ false,
/* get_function_name= */ [&](const auto & res) -> std::string {
auto function_id = builder.str(res.groups[1]);

auto dot_pos = function_id.find(".");
if (dot_pos == std::string::npos) {
return "";
}

auto colon_pos = function_id.find(':', dot_pos + 1);
if (colon_pos == std::string::npos)
return function_id.substr(dot_pos + 1);
else
return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
},
/* get_function_id= */ [&](const auto & res) -> std::string {
auto function_id = builder.str(res.groups[1]);

auto dot_pos = function_id.find(".");
if (dot_pos == std::string::npos) {
return "";
}
return function_id;
}
);
}

static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
// First try to parse using the standard reasoning parsing method
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());

auto start_pos = builder.pos();
auto found_end_think = builder.try_find_literal("</think>");
builder.move_to(start_pos);

if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
common_chat_parse_kimi_k2_content(builder);
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
// If reasoning was parsed successfully, the remaining content is regular content
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
// </think><|tool_calls_section_begin|><|tool_call_begin|>function<|tool_call_argument_begin|>NAME\n```json\nJSON\n```<|tool_call_end|><|tool_calls_section_end|>
common_chat_parse_kimi_k2_content(builder);
} else {
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
common_chat_parse_kimi_k2_content(builder);
return;
}
// If no reasoning tags found, check if we should treat everything as reasoning
if (builder.syntax().thinking_forced_open) {
// If thinking is forced open but no tags found, treat everything as reasoning
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
builder.add_reasoning_content(builder.consume_rest());
} else {
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
// <|tool_call_begin|>NAME<|tool_call_argument_begin|>JSON<|tool_call_end|>
common_chat_parse_kimi_k2_content(builder);
}
}
}

static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;

Expand Down Expand Up @@ -2912,6 +3068,12 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_deepseek_v3_1(tmpl, params);
}

// Kimi K2: detect based on specific patterns in the template
if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
params.json_schema.is_null()) {
return common_chat_params_init_kimi_k2(tmpl, params);
}

// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
return common_chat_params_init_deepseek_r1(tmpl, params);
Expand Down Expand Up @@ -3139,6 +3301,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
common_chat_parse_lfm2(builder);
break;
case COMMON_CHAT_FORMAT_KIMI_K2:
common_chat_parse_kimi_k2(builder);
break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
Expand Down
1 change: 1 addition & 0 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_NEMOTRON_V2,
COMMON_CHAT_FORMAT_APERTUS,
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
COMMON_CHAT_FORMAT_KIMI_K2,

COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
Expand Down
97 changes: 97 additions & 0 deletions models/templates/moonshotai-Kimi-K2-Thinking.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{%- macro render_content(msg) -%}
{%- set c = msg.get('content') -%}
{%- if c is string -%}
{{ c }}
{%- elif c is not none -%}
{% for content in c -%}
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
{% else -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endmacro -%}

{% macro set_roles(message) -%}
{%- set role_name = message.get('name') or message['role'] -%}
{%- if message['role'] == 'user' -%}
<|im_user|>{{role_name}}<|im_middle|>
{%- elif message['role'] == 'assistant' -%}
<|im_assistant|>{{role_name}}<|im_middle|>
{%- else -%}
<|im_system|>{{role_name}}<|im_middle|>
{%- endif -%}
{%- endmacro -%}


{%- macro render_toolcalls(message) -%}
<|tool_calls_section_begin|>
{%- for tool_call in message['tool_calls'] -%}
{%- set formatted_id = tool_call['id'] -%}
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
{%- endfor -%}
<|tool_calls_section_end|>
{%- endmacro -%}


{# Find last non-tool-call assisitant message #}
{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
{%- for idx in range(messages|length-1, -1, -1) -%}
{%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
{%- set ns.last_non_tool_call_assistant_msg = idx -%}
{%- break -%}
{%- endif -%}
{%- endfor -%}

{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}

{%- if tools -%}
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson() }}<|im_end|>
{%- endif -%}

{%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
{%- endif -%}

{%- for message in hist_msgs -%}
{{set_roles(message)}}
{%- if message['role'] == 'assistant' -%}
<think></think>{{render_content(message)}}
{%- if message.get('tool_calls') -%}
{{render_toolcalls(message)}}
{%- endif -%}
{%- elif message['role'] == 'tool' -%}
{%- set tool_call_id = message.tool_call_id -%}
## Return of {{ tool_call_id }}
{{render_content(message)}}
{%- elif message['content'] is not none -%}
{{render_content(message)}}
{%- endif -%}
<|im_end|>
{%- endfor -%}

{%- for message in suffix_msgs -%}
{{set_roles(message)}}
{%- if message['role'] == 'assistant' -%}
{%- set rc = message.get('reasoning_content', '') -%}
<think>{{rc}}</think>{{render_content(message)}}
{%- if message.get('tool_calls') -%}
{{render_toolcalls(message)}}
{%- endif -%}
{%- elif message['role'] == 'tool' -%}
{%- set tool_call_id = message.tool_call_id -%}
## Return of {{ tool_call_id }}
{{render_content(message)}}
{%- elif message['content'] is not none -%}
{{render_content(message)}}
{%- endif -%}
<|im_end|>
{%- endfor -%}


{%- if add_generation_prompt -%}
<|im_assistant|>assistant<|im_middle|>
{%- endif -%}
Loading