Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/chat-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
}
auto msg = builder.result();
if (!is_partial) {
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
}
return msg;
}
Expand Down Expand Up @@ -1663,7 +1663,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std
mapper.from_ast(ctx.ast, result);
}
if (!is_partial) {
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
}
return msg;
}
173 changes: 71 additions & 102 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include "log.h"
#include "regex-partial.h"

// #include <minja/chat-template.hpp>
// #include <minja/minja.hpp>

#include "jinja/parser.h"
#include "jinja/value.h"
#include "jinja/runtime.h"
Expand Down Expand Up @@ -56,39 +53,73 @@ static bool has_content_or_tool_calls(const common_chat_msg & msg) {
return !msg.content.empty() || !msg.tool_calls.empty();
}

template <>
json common_chat_msg::to_json_oaicompat() const
{
json message {
{"role", "assistant"},
json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
if (!content.empty() && !content_parts.empty()) {
throw std::runtime_error("Cannot specify both content and content_parts");
}
json jmsg {
{"role", role},
};
if (!content.empty()) {
jmsg["content"] = content;
} else if (!content_parts.empty()) {
if (concat_typed_text) {
std::string text;
for (const auto & part : content_parts) {
if (part.type != "text") {
LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
continue;
}
if (!text.empty()) {
text += '\n';
}
text += part.text;
}
jmsg["content"] = text;
} else {
auto & parts = jmsg["content"] = json::array();
for (const auto & part : content_parts) {
parts.push_back({
{"type", part.type},
{"text", part.text},
});
}
}
} else {
jmsg["content"] = "";
}
if (!reasoning_content.empty()) {
message["reasoning_content"] = reasoning_content;
jmsg["reasoning_content"] = reasoning_content;
}
if (content.empty() && !tool_calls.empty()) {
message["content"] = json();
} else {
message["content"] = content;
if (!tool_name.empty()) {
jmsg["name"] = tool_name;
}
if (!tool_call_id.empty()) {
jmsg["tool_call_id"] = tool_call_id;
}
if (!tool_calls.empty()) {
auto arr = json::array();
for (const auto & tc : tool_calls) {
arr.push_back({
jmsg["tool_calls"] = json::array();
auto & jtool_calls = jmsg["tool_calls"];
for (const auto & tool_call : tool_calls) {
json tc {
{"type", "function"},
{"function", {
{"name", tc.name},
{"arguments", tc.arguments},
{"name", tool_call.name},
{"arguments", tool_call.arguments},
}},
{"id", tc.id},
// // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
// // We only generate a random id for the ones that don't generate one by themselves
// // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
// {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
});
};
if (!tool_call.id.empty()) {
tc["id"] = tool_call.id;
}
// Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
// We only generate a random id for the ones that don't generate one by themselves
// (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
// {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
jtool_calls.push_back(tc);
}
message["tool_calls"] = arr;
}
return message;

return jmsg;
}

std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
Expand Down Expand Up @@ -256,7 +287,6 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates *
return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
}

template <>
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
std::vector<common_chat_msg> msgs;

Expand Down Expand Up @@ -350,80 +380,15 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
return msgs;
}

template <>
json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text) {
json messages = json::array();
for (const auto & msg : msgs) {
if (!msg.content.empty() && !msg.content_parts.empty()) {
throw std::runtime_error("Cannot specify both content and content_parts");
}
json jmsg {
{"role", msg.role},
};
if (!msg.content.empty()) {
jmsg["content"] = msg.content;
} else if (!msg.content_parts.empty()) {
if (concat_typed_text) {
std::string text;
for (const auto & part : msg.content_parts) {
if (part.type != "text") {
LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
continue;
}
if (!text.empty()) {
text += '\n';
}
text += part.text;
}
jmsg["content"] = text;
} else {
auto & parts = jmsg["content"] = json::array();
for (const auto & part : msg.content_parts) {
parts.push_back({
{"type", part.type},
{"text", part.text},
});
}
}
} else {
jmsg["content"] = "";
}
if (!msg.reasoning_content.empty()) {
jmsg["reasoning_content"] = msg.reasoning_content;
}
if (!msg.tool_name.empty()) {
jmsg["name"] = msg.tool_name;
}
if (!msg.tool_call_id.empty()) {
jmsg["tool_call_id"] = msg.tool_call_id;
}
if (!msg.tool_calls.empty()) {
auto & tool_calls = jmsg["tool_calls"] = json::array();
for (const auto & tool_call : msg.tool_calls) {
json tc {
{"type", "function"},
{"function", {
{"name", tool_call.name},
{"arguments", tool_call.arguments},
}},
};
if (!tool_call.id.empty()) {
tc["id"] = tool_call.id;
}
tool_calls.push_back(tc);
}
}
json jmsg = msg.to_json_oaicompat(concat_typed_text);
messages.push_back(jmsg);
}
return messages;
}

template <>
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const std::string & messages) {
return common_chat_msgs_parse_oaicompat(json::parse(messages));
}

template <>
std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & tools) {
std::vector<common_chat_tool> result;

Expand Down Expand Up @@ -459,12 +424,6 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
return result;
}

template <>
std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const std::string & tools) {
return common_chat_tools_parse_oaicompat(json::parse(tools));
}

template <>
json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools) {
if (tools.empty()) {
return json();
Expand All @@ -484,7 +443,7 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
return result;
}

template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
json delta = json::object();
if (!diff.reasoning_content_delta.empty()) {
delta["reasoning_content"] = diff.reasoning_content_delta;
Expand Down Expand Up @@ -2867,13 +2826,13 @@ static common_chat_params common_chat_templates_apply_jinja(
const struct common_chat_templates_inputs & inputs)
{
templates_params params;
params.tools = common_chat_tools_to_json_oaicompat<json>(inputs.tools);
params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
? *tmpls->template_tool_use
: *tmpls->template_default;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
params.messages = common_chat_msgs_to_json_oaicompat(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
params.add_generation_prompt = inputs.add_generation_prompt;
params.tool_choice = inputs.tool_choice;
params.reasoning_format = inputs.reasoning_format;
Expand Down Expand Up @@ -2943,6 +2902,10 @@ static common_chat_params common_chat_templates_apply_jinja(
src.find("<arg_value>") != std::string::npos &&
params.json_schema.is_null()) {
workaround::func_args_not_string(params.messages);
if (!params.extra_context.contains("clear_thinking")) {
// by default, do not clear reasoning_content (added since GLM-4.7)
params.extra_context["clear_thinking"] = false;
}
return common_chat_params_init_glm_4_5(tmpl, params);
}

Expand Down Expand Up @@ -3174,3 +3137,9 @@ common_chat_params common_chat_templates_apply(
? common_chat_templates_apply_jinja(tmpls, inputs)
: common_chat_templates_apply_legacy(tmpls, inputs);
}

std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
GGML_ASSERT(chat_templates != nullptr);
GGML_ASSERT(chat_templates->template_default != nullptr);
return chat_templates->template_default->caps.to_map();
}
25 changes: 16 additions & 9 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <vector>
#include <map>

#include <nlohmann/json_fwd.hpp>

struct common_chat_templates;

struct common_chat_tool_call {
Expand All @@ -26,6 +28,11 @@ struct common_chat_msg_content_part {
std::string type;
std::string text;

// TODO @ngxson : no known chat templates support reasoning_content in content parts yet
// this can be useful for models with interleaved thinking (like Kimi-K2)
// if you see any templates explicitly support this, please ping me
// std::string reasoning_content;

bool operator==(const common_chat_msg_content_part & other) const {
return type == other.type && text == other.text;
}
Expand All @@ -40,7 +47,7 @@ struct common_chat_msg {
std::string tool_name;
std::string tool_call_id;

template <class T> T to_json_oaicompat() const;
nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;

bool empty() const {
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
Expand Down Expand Up @@ -232,13 +239,13 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);

// Parses a JSON array of messages in OpenAI's chat completion API format.
// T can be std::string containing JSON or nlohmann::ordered_json
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);

std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools);
nlohmann::ordered_json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);

// Parses a JSON array of tools in OpenAI's chat completion tool call API format.
// T can be std::string containing JSON or nlohmann::ordered_json
template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);

template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
// get template caps, useful for reporting to server /props endpoint
std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
53 changes: 48 additions & 5 deletions common/jinja/caps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,23 @@ static void caps_print_stats(value & v, const std::string & path) {
ops.c_str());
}

std::map<std::string, bool> caps::to_map() const {
return {
{"requires_typed_content", requires_typed_content},
{"supports_tools", supports_tools},
{"supports_tool_calls", supports_tool_calls},
{"supports_parallel_tool_calls", supports_parallel_tool_calls},
{"supports_system_role", supports_system_role},
{"supports_preserve_reasoning", supports_preserve_reasoning},
};
}

std::string caps::to_string() const {
std::ostringstream ss;
ss << "Caps(\n";
ss << " requires_typed_content=" << requires_typed_content << "\n";
ss << " supports_tools=" << supports_tools << "\n";
ss << " supports_tool_calls=" << supports_tool_calls << "\n";
ss << " supports_parallel_tool_calls=" << supports_parallel_tool_calls << "\n";
ss << " supports_system_role=" << supports_system_role << "\n";
for (const auto & [key, value] : to_map()) {
ss << " " << key << "=" << (value ? "true" : "false") << "\n";
}
ss << ")";
return ss.str();
}
Expand Down Expand Up @@ -229,6 +238,40 @@ caps caps_get(jinja::program & prog) {
}
);

// case: preserve reasoning content in chat history
caps_try_execute(
prog,
[&]() {
// messages
return json::array({
{
{"role", "user"},
{"content", "User message"}
},
{
{"role", "assistant"},
{"content", "Assistant message"},
{"reasoning_content", "Reasoning content"}
},
{
{"role", "user"},
{"content", "User message"}
},
});
},
[&]() {
// tools
return json::array();
},
[&](bool, value & messages, value &) {
auto & content = messages->at(1)->at("reasoning_content");
caps_print_stats(content, "messages[1].reasoning_content");
if (content->stats.used) {
result.supports_preserve_reasoning = true;
}
}
);

JJ_DEBUG("%s\n", result.to_string().c_str());

return result;
Expand Down
Loading