-
Notifications
You must be signed in to change notification settings - Fork 15k
server : support preserving reasoning_content in assistant message #18994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,8 @@ | |
| #include <vector> | ||
| #include <map> | ||
|
|
||
| #include <nlohmann/json_fwd.hpp> | ||
|
|
||
| struct common_chat_templates; | ||
|
|
||
| struct common_chat_tool_call { | ||
|
|
@@ -26,6 +28,11 @@ struct common_chat_msg_content_part { | |
| std::string type; | ||
| std::string text; | ||
|
|
||
| // TODO @ngxson : no known chat templates support reasoning_content in content parts yet | ||
| // this can be useful for models with interleaved thinking (like Kimi-K2) | ||
| // if you see any templates explicitly support this, please ping me | ||
| // std::string reasoning_content; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you could argue that GPT-OSS does, but don't know if anyone properly supports that. |
||
|
|
||
| bool operator==(const common_chat_msg_content_part & other) const { | ||
| return type == other.type && text == other.text; | ||
| } | ||
|
|
@@ -40,7 +47,7 @@ struct common_chat_msg { | |
| std::string tool_name; | ||
| std::string tool_call_id; | ||
|
|
||
| template <class T> T to_json_oaicompat() const; | ||
| nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const; | ||
|
|
||
| bool empty() const { | ||
| return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty(); | ||
|
|
@@ -232,13 +239,13 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin | |
| bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates); | ||
|
|
||
| // Parses a JSON array of messages in OpenAI's chat completion API format. | ||
| // T can be std::string containing JSON or nlohmann::ordered_json | ||
| template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages); | ||
| template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false); | ||
| std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages); | ||
| nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false); | ||
|
|
||
| std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools); | ||
| nlohmann::ordered_json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools); | ||
|
|
||
| // Parses a JSON array of tools in OpenAI's chat completion tool call API format. | ||
| // T can be std::string containing JSON or nlohmann::ordered_json | ||
| template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools); | ||
| template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools); | ||
| nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff); | ||
|
|
||
| template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff); | ||
| // get template caps, useful for reporting to server /props endpoint | ||
| std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,14 +61,23 @@ static void caps_print_stats(value & v, const std::string & path) { | |
| ops.c_str()); | ||
| } | ||
|
|
||
| std::map<std::string, bool> caps::to_map() const { | ||
| return { | ||
| {"requires_typed_content", requires_typed_content}, | ||
| {"supports_tools", supports_tools}, | ||
| {"supports_tool_calls", supports_tool_calls}, | ||
| {"supports_parallel_tool_calls", supports_parallel_tool_calls}, | ||
| {"supports_system_role", supports_system_role}, | ||
| {"supports_preserve_reasoning", supports_preserve_reasoning}, | ||
| }; | ||
| } | ||
|
|
||
| std::string caps::to_string() const { | ||
| std::ostringstream ss; | ||
| ss << "Caps(\n"; | ||
| ss << " requires_typed_content=" << requires_typed_content << "\n"; | ||
| ss << " supports_tools=" << supports_tools << "\n"; | ||
| ss << " supports_tool_calls=" << supports_tool_calls << "\n"; | ||
| ss << " supports_parallel_tool_calls=" << supports_parallel_tool_calls << "\n"; | ||
| ss << " supports_system_role=" << supports_system_role << "\n"; | ||
| for (const auto & [key, value] : to_map()) { | ||
| ss << " " << key << "=" << (value ? "true" : "false") << "\n"; | ||
| } | ||
| ss << ")"; | ||
| return ss.str(); | ||
| } | ||
|
|
@@ -229,6 +238,40 @@ caps caps_get(jinja::program & prog) { | |
| } | ||
| ); | ||
|
|
||
| // case: preserve reasoning content in chat history | ||
| caps_try_execute( | ||
| prog, | ||
| [&]() { | ||
| // messages | ||
| return json::array({ | ||
| { | ||
| {"role", "user"}, | ||
| {"content", "User message"} | ||
| }, | ||
| { | ||
| {"role", "assistant"}, | ||
| {"content", "Assistant message"}, | ||
| {"reasoning_content", "Reasoning content"} | ||
| }, | ||
|
Comment on lines
+251
to
+255
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might need a couple more capability checks for The current logic for these models transforms
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for gpt-oss, it seems like reasoning is only allowed to be added if
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line 293: |
||
| { | ||
| {"role", "user"}, | ||
| {"content", "User message"} | ||
| }, | ||
| }); | ||
| }, | ||
| [&]() { | ||
| // tools | ||
| return json::array(); | ||
| }, | ||
| [&](bool, value & messages, value &) { | ||
| auto & content = messages->at(1)->at("reasoning_content"); | ||
| caps_print_stats(content, "messages[1].reasoning_content"); | ||
| if (content->stats.used) { | ||
| result.supports_preserve_reasoning = true; | ||
| } | ||
| } | ||
| ); | ||
|
|
||
| JJ_DEBUG("%s\n", result.to_string().c_str()); | ||
|
|
||
| return result; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should just remove those at this point, we're not going back to Minja.