Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ add_library(${TARGET} STATIC
json-schema-to-grammar.cpp
train.h
train.cpp
log.cpp
log.h
ngram-cache.h
ngram-cache.cpp
speculative.cpp
Expand Down
1,000 changes: 1,000 additions & 0 deletions common/chat-parser.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions common/chat-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class common_chat_msg_parser {
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
bool add_tool_calls(const nlohmann::ordered_json & arr);

// Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
bool add_tool_call_short_form(const nlohmann::ordered_json& tool_call);

void finish();

bool consume_spaces();
Expand Down
804 changes: 13 additions & 791 deletions common/chat.cpp

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_CONTENT_ONLY,
COMMON_CHAT_FORMAT_GENERIC,
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
COMMON_CHAT_FORMAT_MAGISTRAL,
COMMON_CHAT_FORMAT_LLAMA_3_X,
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
Expand All @@ -112,6 +113,10 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_COMMAND_R7B,
COMMON_CHAT_FORMAT_GRANITE,
COMMON_CHAT_FORMAT_GPT_OSS,
COMMON_CHAT_FORMAT_SEED_OSS,
COMMON_CHAT_FORMAT_NEMOTRON_V2,
COMMON_CHAT_FORMAT_APERTUS,
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
COMMON_CHAT_FORMAT_GLM_4_5,
COMMON_CHAT_FORMAT_MINIMAX_M2,
COMMON_CHAT_FORMAT_KIMI_K2,
Expand Down
191 changes: 20 additions & 171 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2726,11 +2726,29 @@ bool fs_validate_filename(const std::string & filename) {
return true;
}

#ifdef _WIN32
static std::wstring utf8_to_wstring(const std::string& str) {
if (str.empty()) {
return std::wstring();
}

int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);

if (size <= 0) {
return std::wstring();
}

std::wstring wstr(size, 0);
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);

return wstr;
}
#endif

// returns true if successful, false otherwise
bool fs_create_directory_with_parents(const std::string & path) {
#ifdef _WIN32
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring wpath = converter.from_bytes(path);
std::wstring wpath = utf8_to_wstring(path);

// if the path already exists, check whether it's a directory
const DWORD attributes = GetFileAttributesW(wpath.c_str());
Expand Down Expand Up @@ -3586,175 +3604,6 @@ bool llama_should_add_bos_token(const llama_model * model) {
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
}

//
// Chat template utils
//
//
//bool llama_chat_verify_template(const struct llama_model* model, const std::string& tmpl, bool use_jinja) {
// if (use_jinja) {
// try {
// auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
// common_chat_inputs inputs;
// inputs.messages = json::array({ {
// {"role", "user"},
// {"content", "test"},
// } });
// common_chat_params_init(chat_template, inputs);
// return true;
// }
// catch (const std::exception& e) {
// fprintf(stdout,"%s: failed to apply template: %s\n", __func__, e.what());
// return false;
// }
// }
// llama_chat_message chat[] = { {"user", "test"} };
// const int res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
// return res >= 0;
//}

//std::string llama_chat_apply_template(const struct llama_model * model,
// const common_chat_template& tmpl,
// const std::vector<common_chat_msg> & msgs,
// bool add_ass,
// bool use_jinja) {
// if (use_jinja) {
// auto messages = json::array();
// for (const auto& msg : msgs) {
// messages.push_back({ {"role", msg.role}, {"content", msg.content} });
// }
// common_chat_inputs inputs;
// inputs.messages = messages;
// inputs.add_generation_prompt = add_ass;
// return common_chat_params_init(tmpl, inputs).prompt;
// }
// int alloc_size = 0;
// std::vector<llama_chat_message> chat;
// for (auto & msg : msgs) {
// chat.push_back({msg.role.c_str(), msg.content.c_str()});
// alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
// }
//
// std::vector<char> buf(alloc_size);
//
// // run the first time to get the total output length
// int32_t res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
// // error: chat template is not supported
// if (res < 0) {
// // if the custom "tmpl" is not supported, we throw an error
// // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
// throw std::runtime_error("this custom template is not supported");
// }
//
// // if it turns out that our buffer is too small, we resize it
// if ((size_t)res > buf.size()) {
// buf.resize(res);
// res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
// }
//
// std::string formatted_chat(buf.data(), res);
// return formatted_chat;
//}
////
//std::string llama_chat_format_single(const struct llama_model * model,
// const common_chat_template& tmpl,
// const std::vector<common_chat_msg> & past_msg,
// const common_chat_msg & new_msg,
// bool add_ass,
// bool use_jinja) {
// std::ostringstream ss;
// auto fmt_past_msg = past_msg.empty() ? "" : llama_chat_apply_template(model, tmpl, past_msg, false, use_jinja);
// std::vector<common_chat_msg> chat_new(past_msg);
// // if the past_msg ends with a newline, we must preserve it in the formatted version
// if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
// ss << "\n";
// };
// // format chat with new_msg
// chat_new.push_back(new_msg);
// auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass, use_jinja);
// // get the diff part
// ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
// return ss.str();
//}

//std::string llama_chat_format_example(const struct llama_model * model, const common_chat_template& tmpl, bool use_jinja) {
// std::vector<common_chat_msg> msgs = {
// {"system", "You are a helpful assistant", {}},
// {"user", "Hello", {}},
// {"assistant", "Hi there", {}},
// {"user", "How are you?", {}},
// };
// return llama_chat_apply_template(model, tmpl, msgs, true, use_jinja);
//}
//
//#define CHATML_TEMPLATE_SRC \
// "{%- for message in messages -%}\n" \
// " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
// "{%- endfor -%}\n" \
// "{%- if add_generation_prompt -%}\n" \
// " {{- '<|im_start|>assistant\n' -}}\n" \
// "{%- endif -%}"
//
//common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override)
//{
// std::string default_template_src;
// std::string template_tool_use_src;
// bool has_explicit_template = !chat_template_override.empty();
// if (chat_template_override.empty()) {
// auto str = llama_model_chat_template(model, /* name */ nullptr);
// if (str) {
// default_template_src = str;
// has_explicit_template = true;
// }
// str = llama_model_chat_template(model, /* name */ "tool_use");
// if (str) {
// template_tool_use_src = str;
// has_explicit_template = true;
// }
// }
// else {
// default_template_src = chat_template_override;
// }
// if (default_template_src.empty() || default_template_src == "chatml") {
// if (!template_tool_use_src.empty()) {
// default_template_src = template_tool_use_src;
// }
// else {
// default_template_src = CHATML_TEMPLATE_SRC;
// }
// }
// auto vocab = llama_model_get_vocab(model);
// const auto get_token = [&](llama_token token, const char* name, const char* jinja_variable_name) {
// if (token == LLAMA_TOKEN_NULL) {
// if (default_template_src.find(jinja_variable_name) != std::string::npos
// || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
// fprintf(stdout, "%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name);
// }
// return std::string();
// }
// else {
// return llama_token_to_piece(model, token, true);
// }
// };
// auto token_bos = get_token(llama_token_bos_impl(*vocab), "BOS", "bos_token");
// auto token_eos = get_token(llama_token_eos_impl(*vocab), "EOS", "eos_token");
// try {
// return {
// has_explicit_template,
// std::make_unique<minja::chat_template>(default_template_src, token_bos, token_eos),
// template_tool_use_src.empty()
// ? nullptr
// : std::make_unique<minja::chat_template>(template_tool_use_src, token_bos, token_eos),
// };
// }
// catch (const std::exception& e) {
// LOG("%s: failed to parse chat template: %s\n", __func__, e.what());
// return {
// has_explicit_template,
// std::make_unique<minja::chat_template>(CHATML_TEMPLATE_SRC, token_bos, token_eos),
// nullptr,
// };
// }
//}

//
// KV cache utils
Expand Down
Loading