From 47134fc172e3fbab231cfdbcb153f7bdb3c8f031 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:44:08 +0000 Subject: [PATCH 01/23] from previous PR --- requirements/requirements-tool_bench.txt | 2 +- tools/server/server-common.cpp | 60 +++++++ tools/server/server-common.h | 5 + tools/server/server-context.cpp | 87 ++++++++++- tools/server/server-context.h | 1 + tools/server/server-task.cpp | 147 ++++++++++++++++++ tools/server/server-task.h | 7 + tools/server/server.cpp | 2 + tools/server/tests/requirements.txt | 2 +- .../tests/unit/test_compat_oai_responses.py | 48 ++++++ 10 files changed, 352 insertions(+), 9 deletions(-) create mode 100644 tools/server/tests/unit/test_compat_oai_responses.py diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index f7912aff72..3bb74fb9d0 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,7 +3,7 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 4aeeda2ffe..a615760954 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1069,6 +1069,48 @@ json oaicompat_chat_params_parse( return llama_params; } +json convert_responses_to_chatcmpl(const json & body) { + if (!body.contains("input")) { + throw std::invalid_argument("'input' is required"); + } + if (!json_value(body, "previous_response_id", std::string{}).empty()) { + throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); + } + + const json input_value = body.at("input"); + json chatcmpl_messages = json::array(); + + if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else if (input_value.is_string()) { + chatcmpl_messages.push_back({ + {"role", "user"}, + {"content", input_value}, + }); + } else { + std::invalid_argument("'input' must be a string or array of objects"); + } + + const std::string instructions = json_value(body, "instructions", std::string{}); + if (instructions != "") { + chatcmpl_messages.push_back({ + {"role", "system"}, + {"content", instructions}, + }); + } + + json chatcmpl_body = body; + chatcmpl_body.erase("input"); + chatcmpl_body["messages"] = chatcmpl_messages; + + if (body.contains("max_output_tokens")) { + chatcmpl_body.erase("max_output_tokens"); + chatcmpl_body["max_tokens"] = body["max_output_tokens"]; + } + + return chatcmpl_body; +} + json convert_anthropic_to_oai(const json & body) { json oai_body; @@ -1482,6 +1524,24 @@ std::string format_oai_sse(const json & data) { return ss.str(); } +std::string format_oai_resp_sse(const json & data) { + std::ostringstream ss; + auto send_single = [&ss](const json & event_obj) { + ss << "event: " << event_obj.at("event").get() << "\n"; + ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n"; + }; + + if (data.is_array()) { + for (const auto & item : data) { + send_single(item); + } + } else { + send_single(data); + } + + return ss.str(); +} + std::string format_anthropic_sse(const json & data) { std::ostringstream ss; diff --git a/tools/server/server-common.h b/tools/server/server-common.h index a88d40494a..2629a6bee9 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -294,6 +294,9 @@ json oaicompat_chat_params_parse( const server_chat_params & opt, std::vector & out_files); +// convert OpenAI Responses API format to OpenAI Chat Completions API format +json convert_responses_to_chatcmpl(const json & body); + // convert Anthropic Messages API format to OpenAI Chat Completions API format json convert_anthropic_to_oai(const json & body); @@ -331,6 +334,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l // note: if data is a json array, it will be sent as multiple events, one per item std::string format_oai_sse(const json & data); +std::string format_oai_resp_sse(const json & data); + // format Anthropic-style SSE with event types std::string format_anthropic_sse(const json & data); diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index f1f677addd..c84e36c4a6 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3073,6 +3073,58 @@ std::unique_ptr server_routes::handle_completions_impl( json first_result_json = first_result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { res->data = format_anthropic_sse(first_result_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + const json created = { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json in_progress = { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json output_item_added = { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"type", "message"}, + {"status", "in_progress"}, + {"content", json::array()}, + {"role", "assistant"} + }} + }} + }; + const json content_part_added = { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"part", json { + {"type", "output_text"}, + {"text", ""} + }} + }} + }; + + const json initial_events = json::array({ + created, + in_progress, + output_item_added, + content_part_added + }); + + res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json); } else { res->data = format_oai_sse(first_result_json); } @@ -3107,13 +3159,16 @@ std::unique_ptr server_routes::handle_completions_impl( // check if there is more data if (!rd.has_next()) { - if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { - // Anthropic doesn't send [DONE], message_stop was already sent - output = ""; - } else if (res_type != TASK_RESPONSE_TYPE_NONE) { - output = "data: [DONE]\n\n"; - } else { - output = ""; + switch (res_type) { + case TASK_RESPONSE_TYPE_NONE: + case TASK_RESPONSE_TYPE_OAI_RESP: + case TASK_RESPONSE_TYPE_ANTHROPIC: + output = ""; + break; + + default: + output = "data: [DONE]\n\n"; + break; } SRV_DBG("%s", "all results received, terminating stream\n"); return false; // no more data, terminate @@ -3141,6 +3196,8 @@ std::unique_ptr server_routes::handle_completions_impl( json res_json = result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { output = format_anthropic_sse(res_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + output = format_oai_resp_sse(res_json); } else { output = format_oai_sse(res_json); } @@ -3575,6 +3632,22 @@ void server_routes::init_routes() { TASK_RESPONSE_TYPE_OAI_CHAT); }; + this->post_responses_oai = [this](const server_http_req & req) { + auto res = create_response(); + std::vector files; + json body = convert_responses_to_chatcmpl(json::parse(req.body)); + json body_parsed = oaicompat_chat_params_parse( + body, + ctx_server.oai_parser_opt, + files); + return handle_completions_impl( + req, + SERVER_TASK_TYPE_COMPLETION, + body_parsed, + files, + TASK_RESPONSE_TYPE_OAI_RESP); + }; + this->post_anthropic_messages = [this](const server_http_req & req) { auto res = create_response(); std::vector files; diff --git a/tools/server/server-context.h b/tools/server/server-context.h index ec1df96950..3e5e870fc5 100644 --- a/tools/server/server-context.h +++ b/tools/server/server-context.h @@ -94,6 +94,7 @@ struct server_routes { server_http_context::handler_t post_completions; server_http_context::handler_t post_completions_oai; server_http_context::handler_t post_chat_completions; + server_http_context::handler_t post_responses_oai; server_http_context::handler_t post_anthropic_messages; server_http_context::handler_t post_anthropic_count_tokens; server_http_context::handler_t post_apply_template; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 2add9667d1..03f63f958d 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -584,6 +584,8 @@ json server_task_result_cmpl_final::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return stream ? to_json_anthropic_stream() : to_json_anthropic(); default: @@ -801,6 +803,122 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { return deltas; } +json server_task_result_cmpl_final::to_json_oaicompat_resp() { + common_chat_msg msg; + if (!oaicompat_msg.empty()) { + msg = oaicompat_msg; + } else { + msg.role = "assistant"; + msg.content = content; + } + + const json reasoning = { + {"type", "reasoning"}, + {"summary", json::array({json { + {"type", "summary_text"}, + {"text", msg.reasoning_content} + }})} + }; + const json message = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({json { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", msg.content} + }})}, + {"role", msg.role} + }; + + std::time_t t = std::time(0); + json res = { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({reasoning, message})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }}, + }; + + if (verbose) { + res["__verbose"] = to_json_non_oaicompat(); + } + if (timings.prompt_n >= 0) { + res.push_back({"timings", timings.to_json()}); + } + + return res; +} + +json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { + json server_sent_events = json::array(); + + server_sent_events.push_back(json { + {"event", "response.output_text.done"}, + {"data", json { + {"type", "response.output_text.done"}, + {"text", oaicompat_msg.content} + }} + }); + + const json part = { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", oaicompat_msg.content} + }; + + server_sent_events.push_back(json { + {"event", "response.content_part.done"}, + {"data", json { + {"type", "response.content_part.done"}, + {"part", part} + }} + }); + + const json item = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({part})}, + {"role", "assistant"} + }; + + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", item} + }} + }); + + std::time_t t = std::time(0); + server_sent_events.push_back(json { + {"event", "response.completed"}, + {"data", json { + {"type", "response.completed"}, + {"response", json { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({item})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }} + }}, + }} + }); + + return server_sent_events; +} + json server_task_result_cmpl_final::to_json_anthropic() { std::string stop_reason = "max_tokens"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { @@ -1066,6 +1184,8 @@ json server_task_result_cmpl_partial::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return to_json_anthropic(); default: @@ -1135,6 +1255,33 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { return res; } +json server_task_result_cmpl_partial::to_json_oaicompat_resp() { + std::vector deltas; + + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + deltas.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"type", "response.reasoning_text.delta"}, + {"delta", diff.reasoning_content_delta} + }} + }); + } + if (!diff.content_delta.empty()) { + deltas.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"delta", diff.content_delta} + }} + }); + } + } + + return deltas; +} + json server_task_result_cmpl_partial::to_json_oaicompat_chat() { bool first = n_decoded == 1; std::time_t t = std::time(0); diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 6835eef507..5c71bb6d21 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -33,6 +33,7 @@ enum task_response_type { TASK_RESPONSE_TYPE_NONE, // llama.cpp native format TASK_RESPONSE_TYPE_OAI_CHAT, TASK_RESPONSE_TYPE_OAI_CMPL, + TASK_RESPONSE_TYPE_OAI_RESP, TASK_RESPONSE_TYPE_OAI_EMBD, TASK_RESPONSE_TYPE_ANTHROPIC, }; @@ -371,6 +372,10 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat_stream(); + json to_json_oaicompat_resp(); + + json to_json_oaicompat_resp_stream(); + json to_json_anthropic(); json to_json_anthropic_stream(); @@ -436,6 +441,8 @@ struct server_task_result_cmpl_partial : server_task_result { json to_json_oaicompat_chat(); + json to_json_oaicompat_resp(); + json to_json_anthropic(); }; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 1d9abf6055..d3d4316026 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -140,6 +140,7 @@ int main(int argc, char ** argv) { routes.post_completions = models_routes->proxy_post; routes.post_completions_oai = models_routes->proxy_post; routes.post_chat_completions = models_routes->proxy_post; + routes.post_responses_oai = models_routes->proxy_post; routes.post_anthropic_messages = models_routes->proxy_post; routes.post_anthropic_count_tokens = models_routes->proxy_post; routes.post_infill = models_routes->proxy_post; @@ -176,6 +177,7 @@ int main(int argc, char ** argv) { ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions)); ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions)); ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint + ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai)); ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting ctx_http.post("/infill", ex_wrapper(routes.post_infill)); diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt index 4ea7f19f77..ca79d025ed 100644 --- a/tools/server/tests/requirements.txt +++ b/tools/server/tests/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 prometheus-client~=0.20.0 requests~=2.32.3 wget~=3.2 diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py new file mode 100644 index 0000000000..e168f4562d --- /dev/null +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -0,0 +1,48 @@ +import pytest +from openai import OpenAI +from utils import * + +server: ServerProcess + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + +def test_responses_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + res = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + ) + assert match_regex("(Suddenly)+", res.output_text) + +def test_responses_stream_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + stream = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + stream=True, + ) + + gathered_text = '' + for r in stream: + if r.type == "response.output_text.delta": + gathered_text += r.delta + if r.type == "response.completed": + assert gathered_text == r.response.output_text + assert match_regex("(Suddenly)+", r.response.output_text) From c41a6d7dd30eaa8aca7e6e9039af83eb18510484 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:44:41 +0000 Subject: [PATCH 02/23] Make instruction(system) as first message --- tools/server/server-common.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index a615760954..e1f28f35ed 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1080,23 +1080,23 @@ json convert_responses_to_chatcmpl(const json & body) { const json input_value = body.at("input"); json chatcmpl_messages = json::array(); - if (input_value.is_array()) { - chatcmpl_messages = input_value; - } else if (input_value.is_string()) { + const std::string instructions = json_value(body, "instructions", std::string()); + if (instructions != "") { chatcmpl_messages.push_back({ - {"role", "user"}, - {"content", input_value}, + {"role", "system"}, + {"content", instructions}, }); - } else { - std::invalid_argument("'input' must be a string or array of objects"); } - const std::string instructions = json_value(body, "instructions", std::string{}); - if (instructions != "") { + if (input_value.is_string()) { chatcmpl_messages.push_back({ - {"role", "system"}, - {"content", instructions}, + {"role", "user"}, + {"content", input_value}, }); + } else if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else { + throw std::invalid_argument("'input' must be a string or array of objects"); } json chatcmpl_body = body; From aa2238ea55c028817241a345bbc7bdf923287832 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:45:32 +0000 Subject: [PATCH 03/23] Convert [input_message] (text/image/file) --- tools/server/server-common.cpp | 70 +++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index e1f28f35ed..373e84e36a 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1094,7 +1094,75 @@ json convert_responses_to_chatcmpl(const json & body) { {"content", input_value}, }); } else if (input_value.is_array()) { - chatcmpl_messages = input_value; + for (const auto & input_message : input_value) { + if (!input_message.contains("content")) { + throw std::invalid_argument("'content' is required"); + } + const json content = input_message.at("content"); + + if (content.is_string()) { + chatcmpl_messages.push_back(input_message); + } else if (content.is_array()) { + json new_content = json::array(); + + for (const auto & input_item : content) { + const std::string type = json_value(input_item, "type", std::string()); + + if (type == "input_text") { + if (!input_item.contains("text")) { + throw std::invalid_argument("'Input text' requires 'text'"); + } + new_content.push_back({ + {"text", input_item.at("text")}, + {"type", "text"} + }); + } else if (type == "input_image") { + // While `detail` is marked as required, + // it has default value("auto") and can be omitted. + + if (!input_item.contains("image_url")) { + throw std::invalid_argument("'image_url' is required"); + } + new_content.push_back({ + {"image_url", json {{"url", input_item.at("image_url")}}}, + {"type", "image_url"} + }); + } else if (type == "input_file") { + if (input_item.contains("file_url")) { + // chat completion API does not support file_url + throw std::invalid_argument("'file_url' is not supported"); + } + if (!input_item.contains("file_data") || !input_item.contains("filename")) { + throw std::invalid_argument("Both 'file_data' and 'filename' are required"); + } + new_content.push_back({ + {"file", json { + {"file_data", input_item.at("file_data")}, + {"filename", input_item.at("filename")}}}, + {"type", "file"} + }); + } else { + throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'"); + } + } + + json new_input_message = input_message; + new_input_message["content"] = new_content; + + chatcmpl_messages.push_back(new_input_message); + } else { + throw std::invalid_argument("'content' must be a string or array of objects"); + } + + const std::string role = json_value(input_message, "role", std::string()); + if (role != "user" && role != "assistant" && role != "system" && role != "developer") { + throw std::invalid_argument("'role' must be one of user, assistant, system, or developer"); + } + + if (input_message.contains("type") && input_message.at("type") != "message") { + throw std::invalid_argument("If 'type' is defined, it should be 'message'"); + } + } } else { throw std::invalid_argument("'input' must be a string or array of objects"); } From fd0a13bb7507c058132a4b972f525cd0a0f49b6f Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 06:29:05 +0000 Subject: [PATCH 04/23] Rename convert_responses_to_chatcmpl(body) -> response_body --- tools/server/server-common.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 373e84e36a..18d79a1fe0 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1069,18 +1069,18 @@ json oaicompat_chat_params_parse( return llama_params; } -json convert_responses_to_chatcmpl(const json & body) { - if (!body.contains("input")) { +json convert_responses_to_chatcmpl(const json & response_body) { + if (!response_body.contains("input")) { throw std::invalid_argument("'input' is required"); } - if (!json_value(body, "previous_response_id", std::string{}).empty()) { + if (!json_value(response_body, "previous_response_id", std::string{}).empty()) { throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); } - const json input_value = body.at("input"); + const json input_value = response_body.at("input"); json chatcmpl_messages = json::array(); - const std::string instructions = json_value(body, "instructions", std::string()); + const std::string instructions = json_value(response_body, "instructions", std::string()); if (instructions != "") { chatcmpl_messages.push_back({ {"role", "system"}, @@ -1167,13 +1167,13 @@ json convert_responses_to_chatcmpl(const json & body) { throw std::invalid_argument("'input' must be a string or array of objects"); } - json chatcmpl_body = body; + json chatcmpl_body = response_body; chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; - if (body.contains("max_output_tokens")) { + if (response_body.contains("max_output_tokens")) { chatcmpl_body.erase("max_output_tokens"); - chatcmpl_body["max_tokens"] = body["max_output_tokens"]; + chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; } return chatcmpl_body; From f4a87c01b885c57412ef92b66b028839e3d87dc0 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Thu, 1 Jan 2026 00:26:37 +0000 Subject: [PATCH 05/23] Initial tool call support --- tools/server/server-common.cpp | 168 ++++++++++++++++++++++++++++----- tools/server/server-task.cpp | 120 ++++++++++++++++------- 2 files changed, 228 insertions(+), 60 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 18d79a1fe0..0cb1b620a8 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1089,30 +1089,52 @@ json convert_responses_to_chatcmpl(const json & response_body) { } if (input_value.is_string()) { + // #responses_create-input-text_input chatcmpl_messages.push_back({ {"role", "user"}, {"content", input_value}, }); } else if (input_value.is_array()) { - for (const auto & input_message : input_value) { - if (!input_message.contains("content")) { - throw std::invalid_argument("'content' is required"); + // #responses_create-input-input_item_list + + const auto exists_and_is_array = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_array(); + }; + const auto exists_and_is_string = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_string(); + }; + + for (json item : input_value) { + if (exists_and_is_string(item, "content")) { + // #responses_create-input-input_item_list-input_message-content-text_input + // Only "Input message" contains item["content"]::string + // After converting item["content"]::string to item["content"]::array, + // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message" + item["content"] = json::array({ + json { + {"text", item.at("content")}, + {"type", "input_text"} + } + }); } - const json content = input_message.at("content"); - if (content.is_string()) { - chatcmpl_messages.push_back(input_message); - } else if (content.is_array()) { - json new_content = json::array(); + if (exists_and_is_array(item, "content") && + exists_and_is_string(item, "role") && + (item.at("role") == "user" || + item.at("role") == "system" || + item.at("role") == "developer") + ) { + // #responses_create-input-input_item_list-item-input_message + json chatcmpl_content = json::array(); - for (const auto & input_item : content) { + for (const json & input_item : item.at("content")) { const std::string type = json_value(input_item, "type", std::string()); if (type == "input_text") { if (!input_item.contains("text")) { throw std::invalid_argument("'Input text' requires 'text'"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"text", input_item.at("text")}, {"type", "text"} }); @@ -1123,7 +1145,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (!input_item.contains("image_url")) { throw std::invalid_argument("'image_url' is required"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"image_url", json {{"url", input_item.at("image_url")}}}, {"type", "image_url"} }); @@ -1135,7 +1157,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (!input_item.contains("file_data") || !input_item.contains("filename")) { throw std::invalid_argument("Both 'file_data' and 'filename' are required"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"file", json { {"file_data", input_item.at("file_data")}, {"filename", input_item.at("filename")}}}, @@ -1146,21 +1168,93 @@ json convert_responses_to_chatcmpl(const json & response_body) { } } - json new_input_message = input_message; - new_input_message["content"] = new_content; + if (item.contains("type")) { + item.erase("type"); + } + if (item.contains("status")) { + item.erase("status"); + } + item["content"] = chatcmpl_content; + + chatcmpl_messages.push_back(item); + } else if (exists_and_is_array(item, "content") && + exists_and_is_string(item, "role") && + item.at("role") == "assistant" && + exists_and_is_string(item, "status") && + (item.at("status") == "in_progress" || + item.at("status") == "completed" || + item.at("status") == "incomplete") && + exists_and_is_string(item, "type") && + item.at("type") == "message" + ) { + // #responses_create-input-input_item_list-item-output_message + json chatcmpl_content = json::array(); + + for (const auto & output_text : item.at("content")) { + const std::string type = json_value(output_text, "type", std::string()); + if (type != "output_text") { + throw std::invalid_argument("'type' must be 'output_text'"); + } + if (!exists_and_is_string(output_text, "text")) { + throw std::invalid_argument("'Output text' requires 'text'"); + } + // Ignore annotations and logprobs for now + chatcmpl_content.push_back({ + {"text", output_text.at("text")}, + {"type", "text"} + }); + } - chatcmpl_messages.push_back(new_input_message); + item.erase("status"); + item.erase("type"); + item["content"] = chatcmpl_content; + chatcmpl_messages.push_back(item); + } else if (exists_and_is_string(item, "arguments") && + exists_and_is_string(item, "call_id") && + exists_and_is_string(item, "name") && + exists_and_is_string(item, "type") && + item.at("type") == "function_call" + ) { + // #responses_create-input-input_item_list-item-function_tool_call + chatcmpl_messages.push_back(json { + {"role", "assistant"}, + {"tool_calls", json::array({ json { + {"function", json { + {"arguments", item.at("arguments")}, + {"name", item.at("name")} + }}, + {"id", item.at("call_id")}, + {"type", "function"} + }})}, + }); + } else if (exists_and_is_string(item, "call_id") && + (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) && + exists_and_is_string(item, "type") && + item.at("type") == "function_call_output" + ) { + // #responses_create-input-input_item_list-item-function_tool_call_output + if (item.at("output").is_string()) { + chatcmpl_messages.push_back(json { + {"content", item.at("output")}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")} + }); + } else { + json chatcmpl_outputs = item.at("output"); + for (json & chatcmpl_output : chatcmpl_outputs) { + if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") { + throw std::invalid_argument("Output of tool call should be 'Input text'"); + } + chatcmpl_output["type"] = "text"; + } + chatcmpl_messages.push_back(json { + {"content", chatcmpl_outputs}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")} + }); + } } else { - throw std::invalid_argument("'content' must be a string or array of objects"); - } - - const std::string role = json_value(input_message, "role", std::string()); - if (role != "user" && role != "assistant" && role != "system" && role != "developer") { - throw std::invalid_argument("'role' must be one of user, assistant, system, or developer"); - } - - if (input_message.contains("type") && input_message.at("type") != "message") { - throw std::invalid_argument("If 'type' is defined, it should be 'message'"); + throw std::invalid_argument("Cannot determine type of 'item'"); } } } else { @@ -1171,6 +1265,30 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; + if (response_body.contains("tools")) { + if (!response_body.at("tools").is_array()) { + throw std::invalid_argument("'tools' must be an array of objects"); + } + json chatcmpl_tools = json::array(); + for (json resp_tool : response_body.at("tools")) { + json chatcmpl_tool; + + if (json_value(resp_tool, "type", std::string()) != "function") { + throw std::invalid_argument("'type' of tool must be 'function'"); + } + resp_tool.erase("type"); + chatcmpl_tool["type"] = "function"; + + if (!resp_tool.contains("strict")) { + resp_tool["strict"] = true; + } + chatcmpl_tool["function"] = resp_tool; + chatcmpl_tools.push_back(chatcmpl_tool); + } + chatcmpl_body.erase("tools"); + chatcmpl_body["tools"] = chatcmpl_tools; + } + if (response_body.contains("max_output_tokens")) { chatcmpl_body.erase("max_output_tokens"); chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 03f63f958d..e78086e0aa 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -857,44 +857,69 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { json server_sent_events = json::array(); + json output = json::array(); + + for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", json { + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"call_id", "call_dummy_id"}, + {"name", tool_call.name} + }} + }} + }); + output.push_back({ + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"name", tool_call.name} + }); + } - server_sent_events.push_back(json { - {"event", "response.output_text.done"}, - {"data", json { - {"type", "response.output_text.done"}, - {"text", oaicompat_msg.content} - }} - }); - - const json part = { - {"type", "output_text"}, - {"annotations", json::array()}, - {"logprobs", json::array()}, - {"text", oaicompat_msg.content} - }; + if (oaicompat_msg.content != "") { + server_sent_events.push_back(json { + {"event", "response.output_text.done"}, + {"data", json { + {"type", "response.output_text.done"}, + {"text", oaicompat_msg.content} + }} + }); - server_sent_events.push_back(json { - {"event", "response.content_part.done"}, - {"data", json { - {"type", "response.content_part.done"}, - {"part", part} - }} - }); + const json part = { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", oaicompat_msg.content} + }; - const json item = { - {"type", "message"}, - {"status", "completed"}, - {"content", json::array({part})}, - {"role", "assistant"} - }; + server_sent_events.push_back(json { + {"event", "response.content_part.done"}, + {"data", json { + {"type", "response.content_part.done"}, + {"part", part} + }} + }); + const json item = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({part})}, + {"role", "assistant"} + }; - server_sent_events.push_back(json { - {"event", "response.output_item.done"}, - {"data", json { - {"type", "response.output_item.done"}, - {"item", item} - }} - }); + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", item} + }} + }); + output.push_back(item); + } std::time_t t = std::time(0); server_sent_events.push_back(json { @@ -902,11 +927,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"data", json { {"type", "response.completed"}, {"response", json { + {"id", "resp_dummy_id"}, {"object", "response"}, {"created_at", t}, {"status", "completed"}, {"model", oaicompat_model}, - {"output", json::array({item})}, + {"output", output}, {"usage", json { {"input_tokens", n_prompt_tokens}, {"output_tokens", n_decoded}, @@ -1268,6 +1294,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { }} }); } + if (!diff.tool_call_delta.name.empty()) { + deltas.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"arguments", ""}, + {"call_id", "call_id_dummy"}, + {"name", diff.tool_call_delta.name}, + {"type", "function_call"}, + {"status", "in_progress"} + }} + }} + }); + } + if (!diff.tool_call_delta.arguments.empty()) { + deltas.push_back(json { + {"event", "response.function_call_arguments.delta"}, + {"data", json { + {"type", "response.function_call_arguments.delta"}, + {"delta", diff.tool_call_delta.arguments} + }} + }); + } if (!diff.content_delta.empty()) { deltas.push_back(json { {"event", "response.output_text.delta"}, From 6e47dea6cb418289d3691f10fbcdd344af9947ec Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Fri, 9 Jan 2026 08:00:21 +0000 Subject: [PATCH 06/23] Erase instructions field from chatcmpl body --- tools/server/server-common.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 0cb1b620a8..a2d1d03526 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1078,6 +1078,8 @@ json convert_responses_to_chatcmpl(const json & response_body) { } const json input_value = response_body.at("input"); + json chatcmpl_body = response_body; + chatcmpl_body.erase("input"); json chatcmpl_messages = json::array(); const std::string instructions = json_value(response_body, "instructions", std::string()); @@ -1086,6 +1088,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { {"role", "system"}, {"content", instructions}, }); + chatcmpl_body.erase("instructions"); } if (input_value.is_string()) { @@ -1261,8 +1264,6 @@ json convert_responses_to_chatcmpl(const json & response_body) { throw std::invalid_argument("'input' must be a string or array of objects"); } - json chatcmpl_body = response_body; - chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; if (response_body.contains("tools")) { From 313ea1e871e9307b92bb18869a82475d519ae064 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Fri, 16 Jan 2026 11:19:44 +0000 Subject: [PATCH 07/23] Feed reasoning texts to chat template --- tools/server/server-common.cpp | 49 +++++++++++++++++++++++++++++----- tools/server/server-task.cpp | 43 ++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index a2d1d03526..362c312d4c 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1080,7 +1080,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { const json input_value = response_body.at("input"); json chatcmpl_body = response_body; chatcmpl_body.erase("input"); - json chatcmpl_messages = json::array(); + std::vector chatcmpl_messages; const std::string instructions = json_value(response_body, "instructions", std::string()); if (instructions != "") { @@ -1183,10 +1183,11 @@ json convert_responses_to_chatcmpl(const json & response_body) { } else if (exists_and_is_array(item, "content") && exists_and_is_string(item, "role") && item.at("role") == "assistant" && - exists_and_is_string(item, "status") && - (item.at("status") == "in_progress" || - item.at("status") == "completed" || - item.at("status") == "incomplete") && + // exists_and_is_string(item, "status") && + // (item.at("status") == "in_progress" || + // item.at("status") == "completed" || + // item.at("status") == "incomplete") && + // item["status"] not sent by codex-cli exists_and_is_string(item, "type") && item.at("type") == "message" ) { @@ -1219,7 +1220,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { item.at("type") == "function_call" ) { // #responses_create-input-input_item_list-item-function_tool_call - chatcmpl_messages.push_back(json { + json msg = json { {"role", "assistant"}, {"tool_calls", json::array({ json { {"function", json { @@ -1229,7 +1230,14 @@ json convert_responses_to_chatcmpl(const json & response_body) { {"id", item.at("call_id")}, {"type", "function"} }})}, - }); + }; + + if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) { + // Move reasoning content from dummy message to tool call message + msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content"); + chatcmpl_messages.pop_back(); + } + chatcmpl_messages.push_back(msg); } else if (exists_and_is_string(item, "call_id") && (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) && exists_and_is_string(item, "type") && @@ -1256,6 +1264,19 @@ json convert_responses_to_chatcmpl(const json & response_body) { {"tool_call_id", item.at("call_id")} }); } + } else if (// exists_and_is_string(item, "id") && + // item["id"] not sent by codex-cli + exists_and_is_array(item, "summary") && + exists_and_is_string(item, "type") && + item.at("type") == "reasoning") { + // #responses_create-input-input_item_list-item-reasoning + + // Pack reasoning content in dummy message + chatcmpl_messages.push_back(json { + {"role", "assistant"}, + {"content", json::array()}, + {"reasoning_content", item.at("content")[0].at("text")} + }); } else { throw std::invalid_argument("Cannot determine type of 'item'"); } @@ -1264,6 +1285,20 @@ json convert_responses_to_chatcmpl(const json & response_body) { throw std::invalid_argument("'input' must be a string or array of objects"); } + // Remove unused dummy message + // (reasoning content not followed by tool calls) + chatcmpl_messages.erase(std::remove_if( + chatcmpl_messages.begin(), + chatcmpl_messages.end(), + [](const json & x){ return x.contains("role") && + x.at("role") == "assistant" && + x.contains("content") && + x.at("content") == json::array() && + x.contains("reasoning_content"); + }), + chatcmpl_messages.end() + ); + chatcmpl_body["messages"] = chatcmpl_messages; if (response_body.contains("tools")) { diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index e78086e0aa..b372187f13 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -859,26 +859,23 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { json server_sent_events = json::array(); json output = json::array(); - for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + if (oaicompat_msg.reasoning_content != "") { server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { {"type", "response.output_item.done"}, {"item", json { - {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"call_id", "call_dummy_id"}, - {"name", tool_call.name} + {"id", "rs_id(response.output_item.done)"}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array({json { + {"text", oaicompat_msg.reasoning_content}, + {"type", "reasoning_text"}, + }})}, + {"encrypted_content", ""}, }} }} }); - output.push_back({ - {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"name", tool_call.name} - }); } if (oaicompat_msg.content != "") { @@ -921,6 +918,28 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { output.push_back(item); } + for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", json { + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"call_id", "call_dummy_id"}, + {"name", tool_call.name} + }} + }} + }); + output.push_back({ + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"name", tool_call.name} + }); + } + std::time_t t = std::time(0); server_sent_events.push_back(json { {"event", "response.completed"}, From 7d7058bbcbc53c65d7e58706f64e782221cf16ca Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Fri, 16 Jan 2026 11:33:37 +0000 Subject: [PATCH 08/23] Use std::vector instead of opaque json array --- tools/server/server-common.cpp | 6 +++--- tools/server/server-task.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 362c312d4c..5aff08f0c1 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1128,7 +1128,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { item.at("role") == "developer") ) { // #responses_create-input-input_item_list-item-input_message - json chatcmpl_content = json::array(); + std::vector chatcmpl_content; for (const json & input_item : item.at("content")) { const std::string type = json_value(input_item, "type", std::string()); @@ -1192,7 +1192,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { item.at("type") == "message" ) { // #responses_create-input-input_item_list-item-output_message - json chatcmpl_content = json::array(); + std::vector chatcmpl_content; for (const auto & output_text : item.at("content")) { const std::string type = json_value(output_text, "type", std::string()); @@ -1305,7 +1305,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (!response_body.at("tools").is_array()) { throw std::invalid_argument("'tools' must be an array of objects"); } - json chatcmpl_tools = json::array(); + std::vector chatcmpl_tools; for (json resp_tool : response_body.at("tools")) { json chatcmpl_tool; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index b372187f13..47bd1d8a6e 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -856,8 +856,8 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { } json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { - json server_sent_events = json::array(); - json output = json::array(); + std::vector server_sent_events; + std::vector output; if (oaicompat_msg.reasoning_content != "") { server_sent_events.push_back(json { From e550290deda2586babade0d0b9d28fb07ba3aa06 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sat, 17 Jan 2026 01:44:18 +0000 Subject: [PATCH 09/23] Make output_item.added events consistent --- tools/server/server-context.cpp | 52 +---------- tools/server/server-task.cpp | 53 +---------- tools/server/server-task.h | 157 +++++++++++++++++++++++++++++++- 3 files changed, 157 insertions(+), 105 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index c84e36c4a6..3e3b230591 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3074,57 +3074,7 @@ std::unique_ptr server_routes::handle_completions_impl( if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { res->data = format_anthropic_sse(first_result_json); } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { - const json created = { - {"event", "response.created"}, - {"data", json { - {"type", "response.created"}, - {"response", json { - {"object", "response"}, - {"status", "in_progress"} - }} - }} - }; - const json in_progress = { - {"event", "response.in_progress"}, - {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"object", "response"}, - {"status", "in_progress"} - }} - }} - }; - const json output_item_added = { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"type", "message"}, - {"status", "in_progress"}, - {"content", json::array()}, - {"role", "assistant"} - }} - }} - }; - const json content_part_added = { - {"event", "response.content_part.added"}, - {"data", json { - {"type", "response.content_part.added"}, - {"part", json { - {"type", "output_text"}, - {"text", ""} - }} - }} - }; - - const json initial_events = json::array({ - created, - in_progress, - output_item_added, - content_part_added - }); - - res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json); + res->data = format_oai_resp_sse(first_result_json); } else { res->data = format_oai_sse(first_result_json); } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 47bd1d8a6e..bf986cfe8c 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1230,7 +1230,7 @@ json server_task_result_cmpl_partial::to_json() { case TASK_RESPONSE_TYPE_OAI_CHAT: return to_json_oaicompat_chat(); case TASK_RESPONSE_TYPE_OAI_RESP: - return to_json_oaicompat_resp(); + return openai_responses_current_events; case TASK_RESPONSE_TYPE_ANTHROPIC: return to_json_anthropic(); default: @@ -1300,57 +1300,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { return res; } -json server_task_result_cmpl_partial::to_json_oaicompat_resp() { - std::vector deltas; - - for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty()) { - deltas.push_back(json { - {"event", "response.reasoning_text.delta"}, - {"data", json { - {"type", "response.reasoning_text.delta"}, - {"delta", diff.reasoning_content_delta} - }} - }); - } - if (!diff.tool_call_delta.name.empty()) { - deltas.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"arguments", ""}, - {"call_id", "call_id_dummy"}, - {"name", diff.tool_call_delta.name}, - {"type", "function_call"}, - {"status", "in_progress"} - }} - }} - }); - } - if (!diff.tool_call_delta.arguments.empty()) { - deltas.push_back(json { - {"event", "response.function_call_arguments.delta"}, - {"data", json { - {"type", "response.function_call_arguments.delta"}, - {"delta", diff.tool_call_delta.arguments} - }} - }); - } - if (!diff.content_delta.empty()) { - deltas.push_back(json { - {"event", "response.output_text.delta"}, - {"data", json { - {"type", "response.output_text.delta"}, - {"delta", diff.content_delta} - }} - }); - } - } - - return deltas; -} - json server_task_result_cmpl_partial::to_json_oaicompat_chat() { bool first = n_decoded == 1; std::time_t t = std::time(0); diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 5c71bb6d21..396cb124d1 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -99,6 +99,10 @@ struct task_result_state { std::string generated_text; // append new chunks of generated text here std::vector generated_tool_call_ids; + // for OpenAI Responses API + // contains "resp_...", "rs_...", "fc_...", and "msg_..." generated during streaming + std::vector openai_responses_item_ids; + // for Anthropic API streaming: track content block state across chunks bool anthropic_thinking_block_started = false; bool anthropic_text_block_started = false; @@ -402,6 +406,9 @@ struct server_task_result_cmpl_partial : server_task_result { std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; + // for OpenAI Responses API: Events emitted by current chunk + std::vector openai_responses_current_events; + // for Anthropic API: track if any reasoning content has been generated bool anthropic_has_reasoning = false; // Streaming state copied from task_result_state for this chunk @@ -417,6 +424,154 @@ struct server_task_result_cmpl_partial : server_task_result { virtual void update(task_result_state & state) override { is_updated = true; state.update_chat_msg(content, true, oaicompat_msg_diffs); + + if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + if (state.openai_responses_item_ids.empty()) { + // Create response object + const std::string response_id = "resp_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + openai_responses_current_events.push_back(json { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(response_id); + } + + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + std::string resoning_id; + const std::string prev_item_id = state.openai_responses_item_ids.back(); + if (string_starts_with(prev_item_id, "rs_")) { + resoning_id = state.openai_responses_item_ids.back(); + } else { + // Add new reasoning output_item + + GGML_ASSERT(string_starts_with(prev_item_id, "resp_")); + // Reasoning item should be generated right after the reposonse object is created + + resoning_id = "rs_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"id", resoning_id}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array()}, + {"encrypted_content", ""}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(resoning_id); + } + openai_responses_current_events.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"delta", diff.reasoning_content_delta}, + {"item_id", resoning_id}, + {"type", "response.reasoning_text.delta"}, + }}, + }); + } + if (!diff.tool_call_delta.name.empty()) { + // Add new function call output_item + + const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back(); + state.generated_tool_call_ids.back() = function_call_id; + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"arguments", ""}, + {"call_id", function_call_id}, + {"name", diff.tool_call_delta.name}, + {"type", "function_call"}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(function_call_id); + } + if (!diff.tool_call_delta.arguments.empty()) { + const std::string prev_item_id = state.openai_responses_item_ids.back(); + GGML_ASSERT(string_starts_with(prev_item_id, "fc_")); + + openai_responses_current_events.push_back(json { + {"event", "response.function_call_arguments.delta"}, + {"data", json { + {"delta", diff.tool_call_delta.arguments}, + {"item_id", prev_item_id}, + {"type", "response.function_call_arguments.delta"}, + }}, + }); + } + if (!diff.content_delta.empty()) { + std::string message_id; + if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) { + message_id = state.openai_responses_item_ids.back(); + } else { + message_id = "msg_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"content", json::array()}, + {"id", message_id}, + {"role", "assistant"}, + {"status", "in_progress"}, + {"type", "message"}, + }}, + }}, + }); + openai_responses_current_events.push_back(json { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"item_id", message_id}, + {"part", json { + {"type", "output_text"}, + {"text", ""}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(message_id); + } + openai_responses_current_events.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"item_id", message_id}, + {"delta", diff.content_delta}, + }}, + }); + } + } + + return; + } + + // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); @@ -441,8 +596,6 @@ struct server_task_result_cmpl_partial : server_task_result { json to_json_oaicompat_chat(); - json to_json_oaicompat_resp(); - json to_json_anthropic(); }; From 97e649e8f600bf7077baad93646113b40949e91d Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 18 Jan 2026 10:28:35 +0000 Subject: [PATCH 10/23] Move `server_task_result_cmpl_partial::update` from header to source --- tools/server/server-task.cpp | 168 ++++++++++++++++++++++++++++++++++ tools/server/server-task.h | 171 +---------------------------------- 2 files changed, 170 insertions(+), 169 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index bf986cfe8c..baada85e3e 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1220,6 +1220,174 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() { // // server_task_result_cmpl_partial // +void server_task_result_cmpl_partial::update(task_result_state & state) { + is_updated = true; + state.update_chat_msg(content, true, oaicompat_msg_diffs); + + if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + if (state.openai_responses_item_ids.empty()) { + // Create response object + const std::string response_id = "resp_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + openai_responses_current_events.push_back(json { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(response_id); + } + + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + std::string resoning_id; + const std::string prev_item_id = state.openai_responses_item_ids.back(); + if (string_starts_with(prev_item_id, "rs_")) { + resoning_id = state.openai_responses_item_ids.back(); + } else { + // Add new reasoning output_item + + GGML_ASSERT(string_starts_with(prev_item_id, "resp_")); + // Reasoning item should be generated right after the reposonse object is created + + resoning_id = "rs_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"id", resoning_id}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array()}, + {"encrypted_content", ""}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(resoning_id); + } + openai_responses_current_events.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"delta", diff.reasoning_content_delta}, + {"item_id", resoning_id}, + {"type", "response.reasoning_text.delta"}, + }}, + }); + } + if (!diff.content_delta.empty()) { + std::string message_id; + if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) { + message_id = state.openai_responses_item_ids.back(); + } else { + message_id = "msg_" + random_string(); + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"content", json::array()}, + {"id", message_id}, + {"role", "assistant"}, + {"status", "in_progress"}, + {"type", "message"}, + }}, + }}, + }); + openai_responses_current_events.push_back(json { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"item_id", message_id}, + {"part", json { + {"type", "output_text"}, + {"text", ""}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(message_id); + } + openai_responses_current_events.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"item_id", message_id}, + {"delta", diff.content_delta}, + }}, + }); + } + if (!diff.tool_call_delta.name.empty()) { + // Add new function call output_item + + const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back(); + state.generated_tool_call_ids.back() = function_call_id; + openai_responses_current_events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"arguments", ""}, + {"call_id", function_call_id}, + {"name", diff.tool_call_delta.name}, + {"type", "function_call"}, + {"status", "in_progress"}, + }}, + }}, + }); + state.openai_responses_item_ids.push_back(function_call_id); + } + if (!diff.tool_call_delta.arguments.empty()) { + const std::string prev_item_id = state.openai_responses_item_ids.back(); + GGML_ASSERT(string_starts_with(prev_item_id, "fc_")); + + openai_responses_current_events.push_back(json { + {"event", "response.function_call_arguments.delta"}, + {"data", json { + {"delta", diff.tool_call_delta.arguments}, + {"item_id", prev_item_id}, + {"type", "response.function_call_arguments.delta"}, + }}, + }); + } + } + + return; + } + + // track if the accumulated message has any reasoning content + anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); + + // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk) + anthropic_thinking_block_started = state.anthropic_thinking_block_started; + anthropic_text_block_started = state.anthropic_text_block_started; + + // Pre-compute state updates based on diffs (for next chunk) + for (const auto & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) { + state.anthropic_thinking_block_started = true; + } + if (!diff.content_delta.empty() && !state.anthropic_text_block_started) { + state.anthropic_text_block_started = true; + } + } +} + json server_task_result_cmpl_partial::to_json() { GGML_ASSERT(is_updated && "update() must be called before to_json()"); switch (res_type) { diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 396cb124d1..debc4f4bbf 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -419,176 +419,9 @@ struct server_task_result_cmpl_partial : server_task_result { return false; // in stream mode, partial responses are not considered stop } - virtual json to_json() override; - - virtual void update(task_result_state & state) override { - is_updated = true; - state.update_chat_msg(content, true, oaicompat_msg_diffs); - - if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { - if (state.openai_responses_item_ids.empty()) { - // Create response object - const std::string response_id = "resp_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.created"}, - {"data", json { - {"type", "response.created"}, - {"response", json { - {"id", response_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, - }); - openai_responses_current_events.push_back(json { - {"event", "response.in_progress"}, - {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"id", response_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(response_id); - } - - for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty()) { - std::string resoning_id; - const std::string prev_item_id = state.openai_responses_item_ids.back(); - if (string_starts_with(prev_item_id, "rs_")) { - resoning_id = state.openai_responses_item_ids.back(); - } else { - // Add new reasoning output_item - - GGML_ASSERT(string_starts_with(prev_item_id, "resp_")); - // Reasoning item should be generated right after the reposonse object is created - - resoning_id = "rs_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"id", resoning_id}, - {"summary", json::array()}, - {"type", "reasoning"}, - {"content", json::array()}, - {"encrypted_content", ""}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(resoning_id); - } - openai_responses_current_events.push_back(json { - {"event", "response.reasoning_text.delta"}, - {"data", json { - {"delta", diff.reasoning_content_delta}, - {"item_id", resoning_id}, - {"type", "response.reasoning_text.delta"}, - }}, - }); - } - if (!diff.tool_call_delta.name.empty()) { - // Add new function call output_item - - const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back(); - state.generated_tool_call_ids.back() = function_call_id; - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"arguments", ""}, - {"call_id", function_call_id}, - {"name", diff.tool_call_delta.name}, - {"type", "function_call"}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(function_call_id); - } - if (!diff.tool_call_delta.arguments.empty()) { - const std::string prev_item_id = state.openai_responses_item_ids.back(); - GGML_ASSERT(string_starts_with(prev_item_id, "fc_")); - - openai_responses_current_events.push_back(json { - {"event", "response.function_call_arguments.delta"}, - {"data", json { - {"delta", diff.tool_call_delta.arguments}, - {"item_id", prev_item_id}, - {"type", "response.function_call_arguments.delta"}, - }}, - }); - } - if (!diff.content_delta.empty()) { - std::string message_id; - if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) { - message_id = state.openai_responses_item_ids.back(); - } else { - message_id = "msg_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"content", json::array()}, - {"id", message_id}, - {"role", "assistant"}, - {"status", "in_progress"}, - {"type", "message"}, - }}, - }}, - }); - openai_responses_current_events.push_back(json { - {"event", "response.content_part.added"}, - {"data", json { - {"type", "response.content_part.added"}, - {"item_id", message_id}, - {"part", json { - {"type", "output_text"}, - {"text", ""}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(message_id); - } - openai_responses_current_events.push_back(json { - {"event", "response.output_text.delta"}, - {"data", json { - {"type", "response.output_text.delta"}, - {"item_id", message_id}, - {"delta", diff.content_delta}, - }}, - }); - } - } - - return; - } + virtual void update(task_result_state & state) override; - - // track if the accumulated message has any reasoning content - anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); - - // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk) - anthropic_thinking_block_started = state.anthropic_thinking_block_started; - anthropic_text_block_started = state.anthropic_text_block_started; - - // Pre-compute state updates based on diffs (for next chunk) - for (const auto & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) { - state.anthropic_thinking_block_started = true; - } - if (!diff.content_delta.empty() && !state.anthropic_text_block_started) { - state.anthropic_text_block_started = true; - } - } - } + virtual json to_json() override; json to_json_non_oaicompat(); From d9dca02943a1e37f003a93dc4e63d213c2f3f8d6 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 18 Jan 2026 11:28:24 +0000 Subject: [PATCH 11/23] Match ID of output_item.added and .done events --- tools/server/server-task.cpp | 83 ++++++++++++++++++++++-------------- tools/server/server-task.h | 4 ++ 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index baada85e3e..691275a61f 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -860,34 +860,53 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { std::vector output; if (oaicompat_msg.reasoning_content != "") { + const auto reasoning_id_it = std::find_if( + openai_responses_item_ids.begin(), + openai_responses_item_ids.end(), + [](const std::string & id){ return string_starts_with(id, "rs_"); } + ); + GGML_ASSERT(reasoning_id_it != openai_responses_item_ids.end()); + const std::string reasoning_id = *reasoning_id_it; + + const json output_item = json { + {"id", reasoning_id}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array({json { + {"text", oaicompat_msg.reasoning_content}, + {"type", "reasoning_text"}, + }})}, + {"encrypted_content", ""}, + }; + server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { {"type", "response.output_item.done"}, - {"item", json { - {"id", "rs_id(response.output_item.done)"}, - {"summary", json::array()}, - {"type", "reasoning"}, - {"content", json::array({json { - {"text", oaicompat_msg.reasoning_content}, - {"type", "reasoning_text"}, - }})}, - {"encrypted_content", ""}, - }} + {"item", output_item} }} }); + output.push_back(output_item); } if (oaicompat_msg.content != "") { + const auto message_id_it = std::find_if( + openai_responses_item_ids.begin(), + openai_responses_item_ids.end(), + [](const std::string & id){ return string_starts_with(id, "msg_"); } + ); + GGML_ASSERT(message_id_it != openai_responses_item_ids.end()); + const std::string message_id = *message_id_it; server_sent_events.push_back(json { {"event", "response.output_text.done"}, {"data", json { - {"type", "response.output_text.done"}, - {"text", oaicompat_msg.content} + {"type", "response.output_text.done"}, + {"item_id", message_id}, + {"text", oaicompat_msg.content} }} }); - const json part = { + const json content_part = { {"type", "output_text"}, {"annotations", json::array()}, {"logprobs", json::array()}, @@ -897,14 +916,16 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.content_part.done"}, {"data", json { - {"type", "response.content_part.done"}, - {"part", part} + {"type", "response.content_part.done"}, + {"item_id", message_id}, + {"part", content_part} }} }); - const json item = { + const json output_item = { {"type", "message"}, {"status", "completed"}, - {"content", json::array({part})}, + {"id", message_id}, + {"content", json::array({content_part})}, {"role", "assistant"} }; @@ -912,32 +933,28 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"event", "response.output_item.done"}, {"data", json { {"type", "response.output_item.done"}, - {"item", item} + {"item", output_item} }} }); - output.push_back(item); + output.push_back(output_item); } for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + const json output_item = { + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"call_id", tool_call.id}, + {"name", tool_call.name} + }; server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { {"type", "response.output_item.done"}, - {"item", json { - {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"call_id", "call_dummy_id"}, - {"name", tool_call.name} - }} + {"item", output_item} }} }); - output.push_back({ - {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"name", tool_call.name} - }); + output.push_back(output_item); } std::time_t t = std::time(0); @@ -946,7 +963,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"data", json { {"type", "response.completed"}, {"response", json { - {"id", "resp_dummy_id"}, + {"id", openai_responses_item_ids[0]}, {"object", "response"}, {"created_at", t}, {"status", "completed"}, diff --git a/tools/server/server-task.h b/tools/server/server-task.h index debc4f4bbf..2a44b0824b 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -357,6 +357,9 @@ struct server_task_result_cmpl_final : server_task_result { std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; + // to be copied from task_result_state by update() + std::vector openai_responses_item_ids; + virtual bool is_stop() override { return true; // in stream mode, final responses are considered stop } @@ -366,6 +369,7 @@ struct server_task_result_cmpl_final : server_task_result { virtual void update(task_result_state & state) override { is_updated = true; oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs); + openai_responses_item_ids = state.openai_responses_item_ids; } json to_json_non_oaicompat(); From cd9b4cfada55d06b384781fa30cc65dc7bc10dc9 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 18 Jan 2026 12:49:47 +0000 Subject: [PATCH 12/23] Add function_call only if there is no "fc_" prefix --- tools/server/server-task.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 691275a61f..4998a0980f 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1349,8 +1349,10 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { }}, }); } - if (!diff.tool_call_delta.name.empty()) { + if (!diff.tool_call_delta.name.empty() && + !string_starts_with(state.generated_tool_call_ids.back(), "fc_")) { // Add new function call output_item + // This fails to detect new item if there are >1 consecutive function calls const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back(); state.generated_tool_call_ids.back() = function_call_id; From 6c200df3b3894b16faf3f26131504c9a5063d72d Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 19 Jan 2026 02:52:55 +0000 Subject: [PATCH 13/23] Add function call output at non-streaming API --- tools/server/server-task.cpp | 83 ++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 4998a0980f..459129ecec 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -812,46 +812,63 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { msg.content = content; } - const json reasoning = { - {"type", "reasoning"}, - {"summary", json::array({json { - {"type", "summary_text"}, - {"text", msg.reasoning_content} - }})} - }; - const json message = { - {"type", "message"}, - {"status", "completed"}, - {"content", json::array({json { - {"type", "output_text"}, - {"annotations", json::array()}, - {"logprobs", json::array()}, - {"text", msg.content} - }})}, - {"role", msg.role} - }; + std::vector output; + + if (msg.reasoning_content != "") { + output.push_back(json { + {"id", "rs_" + random_string()}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array({ json { + {"text", msg.reasoning_content}, + {"type", "reasoning_text"}, + }})}, + {"encrypted_content", ""}, + {"status", "completed"}, + }); + } + + if (msg.content != "") { + output.push_back(json { + {"content", json::array({ json { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", msg.content}, + }})}, + {"id", "msg_" + random_string()}, + {"role", msg.role}, + {"status", "completed"}, + {"type", "message"}, + }); + } + + for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + output.push_back(json { + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"call_id", tool_call.id}, + {"name", tool_call.name}, + }); + } std::time_t t = std::time(0); json res = { - {"object", "response"}, - {"created_at", t}, - {"status", "completed"}, - {"model", oaicompat_model}, - {"output", json::array({reasoning, message})}, - {"usage", json { + {"completed_at", t}, + {"created_at", t}, + {"id", "resp_" + random_string()}, + {"model", oaicompat_model}, + {"object", "response"}, + {"output", output}, + {"status", "completed"}, + {"usage", json { {"input_tokens", n_prompt_tokens}, {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens} + {"total_tokens", n_decoded + n_prompt_tokens}, }}, }; - if (verbose) { - res["__verbose"] = to_json_non_oaicompat(); - } - if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); - } - return res; } @@ -872,7 +889,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"id", reasoning_id}, {"summary", json::array()}, {"type", "reasoning"}, - {"content", json::array({json { + {"content", json::array({ json { {"text", oaicompat_msg.reasoning_content}, {"type", "reasoning_text"}, }})}, From 63c60135ab1b23bd3320ba482fc1d2cb2271c76b Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 19 Jan 2026 04:42:08 +0000 Subject: [PATCH 14/23] Test if ID is persistent --- .../tests/unit/test_compat_oai_responses.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py index e168f4562d..7aab4a8ba6 100644 --- a/tools/server/tests/unit/test_compat_oai_responses.py +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -22,6 +22,9 @@ def test_responses_with_openai_library(): max_output_tokens=8, temperature=0.8, ) + assert res.id.startswith("resp_") + assert res.output[0].id is not None + assert res.output[0].id.startswith("msg_") assert match_regex("(Suddenly)+", res.output_text) def test_responses_stream_with_openai_library(): @@ -40,9 +43,31 @@ def test_responses_stream_with_openai_library(): ) gathered_text = '' + resp_id = '' + msg_id = '' for r in stream: + if r.type == "response.created": + assert r.response.id.startswith("resp_") + resp_id = r.response.id + if r.type == "response.in_progress": + assert r.response.id == resp_id + if r.type == "response.output_item.added": + assert r.item.id is not None + assert r.item.id.startswith("msg_") + msg_id = r.item.id + if (r.type == "response.content_part.added" or + r.type == "response.output_text.delta" or + r.type == "response.output_text.done" or + r.type == "response.content_part.done"): + assert r.item_id == msg_id + if r.type == "response.output_item.done": + assert r.item.id == msg_id + if r.type == "response.output_text.delta": gathered_text += r.delta if r.type == "response.completed": + assert r.response.id.startswith("resp_") + assert r.response.output[0].id is not None + assert r.response.output[0].id.startswith("msg_") assert gathered_text == r.response.output_text assert match_regex("(Suddenly)+", r.response.output_text) From f232a1b9bc8f23020b5e298ef4b8340e2349d030 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 19 Jan 2026 05:12:26 +0000 Subject: [PATCH 15/23] Add doc --- tools/server/README.md | 45 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tools/server/README.md b/tools/server/README.md index 9fe8938768..191391a882 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -6,7 +6,7 @@ Set of LLM REST APIs and a web UI to interact with llama.cpp. **Features:** * LLM inference of F16 and quantized models on GPU and CPU - * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes + * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions, responses, and embeddings routes * [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) compatible chat completions * Reranking endpoint (https://github.com/ggml-org/llama.cpp/pull/9510) * Parallel decoding with multi-user support @@ -1267,6 +1267,49 @@ This provides information on the performance of the server. It also allows calcu The total number of tokens in context is equal to `prompt_n + cache_n + predicted_n` +### POST `/v1/responses`: OpenAI-compatible Responses API + +*Options:* + +See [OpenAI Responses API documentation](https://platform.openai.com/docs/api-reference/responses). + +*Examples:* + +You can use either Python `openai` library with appropriate checkpoints: + +```python +import openai + +client = openai.OpenAI( + base_url="http://localhost:8080/v1", # "http://:port" + api_key = "sk-no-key-required" +) + +response = client.responses.create( + model="gpt-4.1", + instructions="You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.", + input="Write a limerick about python exceptions" +) + +print(response.output_text) +``` + +... or raw HTTP requests: + +```shell +curl http://localhost:8080/v1/responses \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer no-key" \ +-d '{ +"model": "gpt-4.1", +"instructions": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.", +"input": "Write a limerick about python exceptions" +}' +``` + +This endpoint works by converting Responses request into Chat Completions request. + + ### POST `/v1/embeddings`: OpenAI-compatible embeddings API This endpoint requires that the model uses a pooling different than type `none`. The embeddings are normalized using the Eucledian norm. From 8a2dd2d5af9e3aca6910758abdf5e3dd3ded5463 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 19 Jan 2026 05:35:44 +0000 Subject: [PATCH 16/23] Fix style - use trailing comma --- tools/server/server-common.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 5aff08f0c1..f60cb3e285 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1139,7 +1139,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { } chatcmpl_content.push_back({ {"text", input_item.at("text")}, - {"type", "text"} + {"type", "text"}, }); } else if (type == "input_image") { // While `detail` is marked as required, @@ -1149,8 +1149,10 @@ json convert_responses_to_chatcmpl(const json & response_body) { throw std::invalid_argument("'image_url' is required"); } chatcmpl_content.push_back({ - {"image_url", json {{"url", input_item.at("image_url")}}}, - {"type", "image_url"} + {"image_url", json { + {"url", input_item.at("image_url")} + }}, + {"type", "image_url"}, }); } else if (type == "input_file") { if (input_item.contains("file_url")) { @@ -1163,8 +1165,9 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_content.push_back({ {"file", json { {"file_data", input_item.at("file_data")}, - {"filename", input_item.at("filename")}}}, - {"type", "file"} + {"filename", input_item.at("filename")}, + }}, + {"type", "file"}, }); } else { throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'"); @@ -1205,7 +1208,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { // Ignore annotations and logprobs for now chatcmpl_content.push_back({ {"text", output_text.at("text")}, - {"type", "text"} + {"type", "text"}, }); } @@ -1225,10 +1228,10 @@ json convert_responses_to_chatcmpl(const json & response_body) { {"tool_calls", json::array({ json { {"function", json { {"arguments", item.at("arguments")}, - {"name", item.at("name")} + {"name", item.at("name")}, }}, {"id", item.at("call_id")}, - {"type", "function"} + {"type", "function"}, }})}, }; @@ -1248,7 +1251,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_messages.push_back(json { {"content", item.at("output")}, {"role", "tool"}, - {"tool_call_id", item.at("call_id")} + {"tool_call_id", item.at("call_id")}, }); } else { json chatcmpl_outputs = item.at("output"); @@ -1261,7 +1264,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_messages.push_back(json { {"content", chatcmpl_outputs}, {"role", "tool"}, - {"tool_call_id", item.at("call_id")} + {"tool_call_id", item.at("call_id")}, }); } } else if (// exists_and_is_string(item, "id") && @@ -1275,7 +1278,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_messages.push_back(json { {"role", "assistant"}, {"content", json::array()}, - {"reasoning_content", item.at("content")[0].at("text")} + {"reasoning_content", item.at("content")[0].at("text")}, }); } else { throw std::invalid_argument("Cannot determine type of 'item'"); @@ -1285,8 +1288,8 @@ json convert_responses_to_chatcmpl(const json & response_body) { throw std::invalid_argument("'input' must be a string or array of objects"); } - // Remove unused dummy message - // (reasoning content not followed by tool calls) + // Remove unused dummy message which contains + // reasoning content not followed by tool call chatcmpl_messages.erase(std::remove_if( chatcmpl_messages.begin(), chatcmpl_messages.end(), From 42a6eb38eb362e5fb6b6e93f961912e6f6248a5e Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 02:22:15 +0000 Subject: [PATCH 17/23] Rewrite state management --- tools/server/server-task.cpp | 331 ++++++++++++++++------------------- tools/server/server-task.h | 38 ++-- 2 files changed, 175 insertions(+), 194 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 459129ecec..75ec0f4074 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -857,7 +857,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { json res = { {"completed_at", t}, {"created_at", t}, - {"id", "resp_" + random_string()}, + {"id", "resp_" + oaicompat_cmpl_id.substr(9)}, {"model", oaicompat_model}, {"object", "response"}, {"output", output}, @@ -877,16 +877,8 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { std::vector output; if (oaicompat_msg.reasoning_content != "") { - const auto reasoning_id_it = std::find_if( - openai_responses_item_ids.begin(), - openai_responses_item_ids.end(), - [](const std::string & id){ return string_starts_with(id, "rs_"); } - ); - GGML_ASSERT(reasoning_id_it != openai_responses_item_ids.end()); - const std::string reasoning_id = *reasoning_id_it; - const json output_item = json { - {"id", reasoning_id}, + {"id", oai_resp_reasoning_id}, {"summary", json::array()}, {"type", "reasoning"}, {"content", json::array({ json { @@ -907,18 +899,11 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { } if (oaicompat_msg.content != "") { - const auto message_id_it = std::find_if( - openai_responses_item_ids.begin(), - openai_responses_item_ids.end(), - [](const std::string & id){ return string_starts_with(id, "msg_"); } - ); - GGML_ASSERT(message_id_it != openai_responses_item_ids.end()); - const std::string message_id = *message_id_it; server_sent_events.push_back(json { {"event", "response.output_text.done"}, {"data", json { {"type", "response.output_text.done"}, - {"item_id", message_id}, + {"item_id", oai_resp_message_id}, {"text", oaicompat_msg.content} }} }); @@ -934,14 +919,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"event", "response.content_part.done"}, {"data", json { {"type", "response.content_part.done"}, - {"item_id", message_id}, + {"item_id", oai_resp_message_id}, {"part", content_part} }} }); const json output_item = { {"type", "message"}, {"status", "completed"}, - {"id", message_id}, + {"id", oai_resp_message_id}, {"content", json::array({content_part})}, {"role", "assistant"} }; @@ -961,7 +946,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"type", "function_call"}, {"status", "completed"}, {"arguments", tool_call.arguments}, - {"call_id", tool_call.id}, + {"call_id", "fc_" + tool_call.id}, {"name", tool_call.name} }; server_sent_events.push_back(json { @@ -980,7 +965,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"data", json { {"type", "response.completed"}, {"response", json { - {"id", openai_responses_item_ids[0]}, + {"id", "resp_" + oaicompat_cmpl_id.substr(9)}, {"object", "response"}, {"created_at", t}, {"status", "completed"}, @@ -1258,168 +1243,27 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { is_updated = true; state.update_chat_msg(content, true, oaicompat_msg_diffs); - if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { - if (state.openai_responses_item_ids.empty()) { - // Create response object - const std::string response_id = "resp_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.created"}, - {"data", json { - {"type", "response.created"}, - {"response", json { - {"id", response_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, - }); - openai_responses_current_events.push_back(json { - {"event", "response.in_progress"}, - {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"id", response_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(response_id); - } - - for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty()) { - std::string resoning_id; - const std::string prev_item_id = state.openai_responses_item_ids.back(); - if (string_starts_with(prev_item_id, "rs_")) { - resoning_id = state.openai_responses_item_ids.back(); - } else { - // Add new reasoning output_item - - GGML_ASSERT(string_starts_with(prev_item_id, "resp_")); - // Reasoning item should be generated right after the reposonse object is created - - resoning_id = "rs_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"id", resoning_id}, - {"summary", json::array()}, - {"type", "reasoning"}, - {"content", json::array()}, - {"encrypted_content", ""}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(resoning_id); - } - openai_responses_current_events.push_back(json { - {"event", "response.reasoning_text.delta"}, - {"data", json { - {"delta", diff.reasoning_content_delta}, - {"item_id", resoning_id}, - {"type", "response.reasoning_text.delta"}, - }}, - }); - } - if (!diff.content_delta.empty()) { - std::string message_id; - if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) { - message_id = state.openai_responses_item_ids.back(); - } else { - message_id = "msg_" + random_string(); - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"content", json::array()}, - {"id", message_id}, - {"role", "assistant"}, - {"status", "in_progress"}, - {"type", "message"}, - }}, - }}, - }); - openai_responses_current_events.push_back(json { - {"event", "response.content_part.added"}, - {"data", json { - {"type", "response.content_part.added"}, - {"item_id", message_id}, - {"part", json { - {"type", "output_text"}, - {"text", ""}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(message_id); - } - openai_responses_current_events.push_back(json { - {"event", "response.output_text.delta"}, - {"data", json { - {"type", "response.output_text.delta"}, - {"item_id", message_id}, - {"delta", diff.content_delta}, - }}, - }); - } - if (!diff.tool_call_delta.name.empty() && - !string_starts_with(state.generated_tool_call_ids.back(), "fc_")) { - // Add new function call output_item - // This fails to detect new item if there are >1 consecutive function calls - - const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back(); - state.generated_tool_call_ids.back() = function_call_id; - openai_responses_current_events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"arguments", ""}, - {"call_id", function_call_id}, - {"name", diff.tool_call_delta.name}, - {"type", "function_call"}, - {"status", "in_progress"}, - }}, - }}, - }); - state.openai_responses_item_ids.push_back(function_call_id); - } - if (!diff.tool_call_delta.arguments.empty()) { - const std::string prev_item_id = state.openai_responses_item_ids.back(); - GGML_ASSERT(string_starts_with(prev_item_id, "fc_")); - - openai_responses_current_events.push_back(json { - {"event", "response.function_call_arguments.delta"}, - {"data", json { - {"delta", diff.tool_call_delta.arguments}, - {"item_id", prev_item_id}, - {"type", "response.function_call_arguments.delta"}, - }}, - }); - } - } + // Copy current state for use in to_json_*() (reflects state BEFORE this chunk) + thinking_block_started = state.thinking_block_started; + text_block_started = state.text_block_started; - return; - } + oai_resp_reasoning_id = state.oai_resp_reasoning_id; + oai_resp_message_id = state.oai_resp_message_id; + oai_resp_fc_id = state.oai_resp_fc_id; // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); - // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk) - anthropic_thinking_block_started = state.anthropic_thinking_block_started; - anthropic_text_block_started = state.anthropic_text_block_started; - // Pre-compute state updates based on diffs (for next chunk) - for (const auto & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) { - state.anthropic_thinking_block_started = true; + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) { + state.thinking_block_started = true; + } + if (!diff.content_delta.empty() && !state.text_block_started) { + state.text_block_started = true; } - if (!diff.content_delta.empty() && !state.anthropic_text_block_started) { - state.anthropic_text_block_started = true; + if (!diff.tool_call_delta.name.empty()) { + state.oai_resp_fc_id = diff.tool_call_delta.id; } } } @@ -1434,7 +1278,7 @@ json server_task_result_cmpl_partial::to_json() { case TASK_RESPONSE_TYPE_OAI_CHAT: return to_json_oaicompat_chat(); case TASK_RESPONSE_TYPE_OAI_RESP: - return openai_responses_current_events; + return to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return to_json_anthropic(); default: @@ -1559,6 +1403,133 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { return deltas; } +json server_task_result_cmpl_partial::to_json_oaicompat_resp() { + std::vector events; + + if (n_decoded == 1) { + const std::string response_id = "resp_" + oaicompat_cmpl_id.substr(9); + events.push_back(json { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + events.push_back(json { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"id", response_id}, + {"object", "response"}, + {"status", "in_progress"}, + }}, + }}, + }); + } + + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + if (!thinking_block_started) { + events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"id", oai_resp_reasoning_id}, + {"summary", json::array()}, + {"type", "reasoning"}, + {"content", json::array()}, + {"encrypted_content", ""}, + {"status", "in_progress"}, + }}, + }}, + }); + thinking_block_started = true; + } + events.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"delta", diff.reasoning_content_delta}, + {"item_id", oai_resp_reasoning_id}, + {"type", "response.reasoning_text.delta"}, + }}, + }); + } + + if (!diff.content_delta.empty()) { + if (!text_block_started) { + events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"content", json::array()}, + {"id", oai_resp_message_id}, + {"role", "assistant"}, + {"status", "in_progress"}, + {"type", "message"}, + }}, + }}, + }); + events.push_back(json { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"item_id", oai_resp_message_id}, + {"part", json { + {"type", "output_text"}, + {"text", ""}, + }}, + }}, + }); + text_block_started = true; + } + events.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"item_id", oai_resp_message_id}, + {"delta", diff.content_delta}, + }}, + }); + } + + if (!diff.tool_call_delta.name.empty()) { + events.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"arguments", ""}, + {"call_id", "fc_" + diff.tool_call_delta.id}, + {"name", diff.tool_call_delta.name}, + {"type", "function_call"}, + {"status", "in_progress"}, + }}, + }}, + }); + oai_resp_fc_id = diff.tool_call_delta.id; + } + + if (!diff.tool_call_delta.arguments.empty()) { + events.push_back(json { + {"event", "response.function_call_arguments.delta"}, + {"data", json { + {"delta", diff.tool_call_delta.arguments}, + {"item_id", "fc_" + oai_resp_fc_id}, + {"type", "response.function_call_arguments.delta"}, + }}, + }); + } + } + return events; +} + // // server_task_result_embd // @@ -1629,8 +1600,8 @@ json server_task_result_cmpl_partial::to_json_anthropic() { // use local copies of streaming state (copied from task_result_state in update()) // these reflect the state BEFORE this chunk was processed - bool thinking_started = anthropic_thinking_block_started; - bool text_started = anthropic_text_block_started; + bool thinking_started = thinking_block_started; + bool text_started = text_block_started; for (const auto & diff : oaicompat_msg_diffs) { // handle thinking/reasoning content diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 2a44b0824b..471f51df9a 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -99,13 +99,15 @@ struct task_result_state { std::string generated_text; // append new chunks of generated text here std::vector generated_tool_call_ids; - // for OpenAI Responses API - // contains "resp_...", "rs_...", "fc_...", and "msg_..." generated during streaming - std::vector openai_responses_item_ids; + // for OpenAI Responses and Anthropic streaming API: + // track output item / content block state across chunks + bool thinking_block_started = false; + bool text_block_started = false; - // for Anthropic API streaming: track content block state across chunks - bool anthropic_thinking_block_started = false; - bool anthropic_text_block_started = false; + // for OpenAI Responses streaming API + const std::string oai_resp_reasoning_id = "rs_" + random_string(); + const std::string oai_resp_message_id = "msg_" + random_string(); + std::string oai_resp_fc_id; // function call ID for current args delta task_result_state(const common_chat_parser_params & chat_parser_params) : chat_parser_params(chat_parser_params) {} @@ -357,8 +359,9 @@ struct server_task_result_cmpl_final : server_task_result { std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; - // to be copied from task_result_state by update() - std::vector openai_responses_item_ids; + // for OpenAI Responses API + std::string oai_resp_reasoning_id; + std::string oai_resp_message_id; virtual bool is_stop() override { return true; // in stream mode, final responses are considered stop @@ -369,7 +372,9 @@ struct server_task_result_cmpl_final : server_task_result { virtual void update(task_result_state & state) override { is_updated = true; oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs); - openai_responses_item_ids = state.openai_responses_item_ids; + + oai_resp_reasoning_id = state.oai_resp_reasoning_id; + oai_resp_message_id = state.oai_resp_message_id; } json to_json_non_oaicompat(); @@ -410,14 +415,17 @@ struct server_task_result_cmpl_partial : server_task_result { std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; - // for OpenAI Responses API: Events emitted by current chunk - std::vector openai_responses_current_events; + // Streaming state copied from task_result_state for this chunk + bool thinking_block_started = false; + bool text_block_started = false; + + // for OpenAI Responses API + std::string oai_resp_reasoning_id; + std::string oai_resp_message_id; + std::string oai_resp_fc_id; // for Anthropic API: track if any reasoning content has been generated bool anthropic_has_reasoning = false; - // Streaming state copied from task_result_state for this chunk - bool anthropic_thinking_block_started = false; - bool anthropic_text_block_started = false; virtual bool is_stop() override { return false; // in stream mode, partial responses are not considered stop @@ -433,6 +441,8 @@ struct server_task_result_cmpl_partial : server_task_result { json to_json_oaicompat_chat(); + json to_json_oaicompat_resp(); + json to_json_anthropic(); }; From 5e1f65c01b42f3792a19ad42d589259cd1963039 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 06:48:31 +0000 Subject: [PATCH 18/23] catch up with upstream/master --- tools/server/server-context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 3e3b230591..9a828e1eff 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3588,7 +3588,7 @@ void server_routes::init_routes() { json body = convert_responses_to_chatcmpl(json::parse(req.body)); json body_parsed = oaicompat_chat_params_parse( body, - ctx_server.oai_parser_opt, + meta->chat_params, files); return handle_completions_impl( req, From 951fe420a93fe28665fc6666e11bc5250340e2d4 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 07:03:10 +0000 Subject: [PATCH 19/23] Fix style - "type" is the first item of SSE data --- tools/server/server-task.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 75ec0f4074..dfc5a9b342 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1454,9 +1454,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.reasoning_text.delta"}, {"data", json { + {"type", "response.reasoning_text.delta"}, {"delta", diff.reasoning_content_delta}, {"item_id", oai_resp_reasoning_id}, - {"type", "response.reasoning_text.delta"}, }}, }); } @@ -1520,9 +1520,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.function_call_arguments.delta"}, {"data", json { + {"type", "response.function_call_arguments.delta"}, {"delta", diff.tool_call_delta.arguments}, {"item_id", "fc_" + oai_resp_fc_id}, - {"type", "response.function_call_arguments.delta"}, }}, }); } From ebb643863d2aae0448b165a68173d564e47dd942 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 13:48:12 +0000 Subject: [PATCH 20/23] Explicitly check "instructions" from response_body --- tools/server/server-common.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index f60cb3e285..d81cb85f42 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1082,11 +1082,10 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_body.erase("input"); std::vector chatcmpl_messages; - const std::string instructions = json_value(response_body, "instructions", std::string()); - if (instructions != "") { + if (response_body.contains("instructions")) { chatcmpl_messages.push_back({ {"role", "system"}, - {"content", instructions}, + {"content", json_value(response_body, "instructions", std::string())}, }); chatcmpl_body.erase("instructions"); } From cf83e1abcce136b4f9adcbaa9a4fc59fe25de2d3 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 13:48:39 +0000 Subject: [PATCH 21/23] Make lambdas static --- tools/server/server-common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index d81cb85f42..ccf90329b3 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1099,10 +1099,10 @@ json convert_responses_to_chatcmpl(const json & response_body) { } else if (input_value.is_array()) { // #responses_create-input-input_item_list - const auto exists_and_is_array = [](const json & j, const char * key) -> bool { + static auto exists_and_is_array = [](const json & j, const char * key) -> bool { return j.contains(key) && j.at(key).is_array(); }; - const auto exists_and_is_string = [](const json & j, const char * key) -> bool { + static auto exists_and_is_string = [](const json & j, const char * key) -> bool { return j.contains(key) && j.at(key).is_string(); }; From 0d5e3deedb210a84465c9cca8c584f428eb8e393 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 13:48:58 +0000 Subject: [PATCH 22/23] Check if reasoning content exists --- tools/server/server-common.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ccf90329b3..ebf0c2ef8c 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1273,6 +1273,16 @@ json convert_responses_to_chatcmpl(const json & response_body) { item.at("type") == "reasoning") { // #responses_create-input-input_item_list-item-reasoning + if (!exists_and_is_array(item, "content")) { + throw std::invalid_argument("item['content'] is not an array"); + } + if (item.at("content").empty()) { + throw std::invalid_argument("item['content'] is empty"); + } + if (!exists_and_is_string(item.at("content")[0], "text")) { + throw std::invalid_argument("item['content']['text'] is not a string"); + } + // Pack reasoning content in dummy message chatcmpl_messages.push_back(json { {"role", "assistant"}, From 5ac23d2f8f00451080cd774f378e36c6e7e88fdc Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 20 Jan 2026 13:47:07 +0000 Subject: [PATCH 23/23] Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final --- tools/server/server-task.cpp | 18 +++++++++--------- tools/server/server-task.h | 15 +++++++++++---- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index dfc5a9b342..bbe49ad2aa 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -857,7 +857,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { json res = { {"completed_at", t}, {"created_at", t}, - {"id", "resp_" + oaicompat_cmpl_id.substr(9)}, + {"id", oai_resp_id}, {"model", oaicompat_model}, {"object", "response"}, {"output", output}, @@ -965,7 +965,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"data", json { {"type", "response.completed"}, {"response", json { - {"id", "resp_" + oaicompat_cmpl_id.substr(9)}, + {"id", oai_resp_id}, {"object", "response"}, {"created_at", t}, {"status", "completed"}, @@ -1245,11 +1245,12 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { // Copy current state for use in to_json_*() (reflects state BEFORE this chunk) thinking_block_started = state.thinking_block_started; - text_block_started = state.text_block_started; + text_block_started = state.text_block_started; - oai_resp_reasoning_id = state.oai_resp_reasoning_id; - oai_resp_message_id = state.oai_resp_message_id; - oai_resp_fc_id = state.oai_resp_fc_id; + oai_resp_id = state.oai_resp_id; + oai_resp_reasoning_id = state.oai_resp_reasoning_id; + oai_resp_message_id = state.oai_resp_message_id; + oai_resp_fc_id = state.oai_resp_fc_id; // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); @@ -1407,13 +1408,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { std::vector events; if (n_decoded == 1) { - const std::string response_id = "resp_" + oaicompat_cmpl_id.substr(9); events.push_back(json { {"event", "response.created"}, {"data", json { {"type", "response.created"}, {"response", json { - {"id", response_id}, + {"id", oai_resp_id}, {"object", "response"}, {"status", "in_progress"}, }}, @@ -1424,7 +1424,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { {"data", json { {"type", "response.in_progress"}, {"response", json { - {"id", response_id}, + {"id", oai_resp_id}, {"object", "response"}, {"status", "in_progress"}, }}, diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 471f51df9a..244470596b 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -105,12 +105,16 @@ struct task_result_state { bool text_block_started = false; // for OpenAI Responses streaming API - const std::string oai_resp_reasoning_id = "rs_" + random_string(); - const std::string oai_resp_message_id = "msg_" + random_string(); + const std::string oai_resp_id; + const std::string oai_resp_reasoning_id; + const std::string oai_resp_message_id; std::string oai_resp_fc_id; // function call ID for current args delta task_result_state(const common_chat_parser_params & chat_parser_params) - : chat_parser_params(chat_parser_params) {} + : chat_parser_params(chat_parser_params) + , oai_resp_id("resp_" + random_string()) + , oai_resp_reasoning_id("rs_" + random_string()) + , oai_resp_message_id("msg_" + random_string()) {} // parse partial tool calls and update the internal state common_chat_msg update_chat_msg( @@ -360,6 +364,7 @@ struct server_task_result_cmpl_final : server_task_result { bool is_updated = false; // for OpenAI Responses API + std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; @@ -373,6 +378,7 @@ struct server_task_result_cmpl_final : server_task_result { is_updated = true; oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs); + oai_resp_id = state.oai_resp_id; oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; } @@ -417,9 +423,10 @@ struct server_task_result_cmpl_partial : server_task_result { // Streaming state copied from task_result_state for this chunk bool thinking_block_started = false; - bool text_block_started = false; + bool text_block_started = false; // for OpenAI Responses API + std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; std::string oai_resp_fc_id;