From 10c38c18e42bd9fd4ebcd514f06bad233ef899ca Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sat, 20 Dec 2025 14:04:59 +0000 Subject: [PATCH 01/15] server: Implement /v1/responses (text generation only) --- tools/server/server-common.cpp | 55 ++++++++++++ tools/server/server-common.h | 5 ++ tools/server/server-context.cpp | 87 +++++++++++++++++-- tools/server/server-context.h | 1 + tools/server/server-task.cpp | 147 ++++++++++++++++++++++++++++++++ tools/server/server-task.h | 7 ++ tools/server/server.cpp | 1 + 7 files changed, 296 insertions(+), 7 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ab6b3aa7ce..2c29bb74e8 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1082,6 +1082,43 @@ json oaicompat_chat_params_parse( return llama_params; } +json convert_responses_to_chatcmpl(const json & body) { + if (!body.contains("input")) { + throw std::invalid_argument("'input' is required"); + } + if (!json_value(body, "previous_response_id", std::string{}).empty()) { + throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); + } + + const json input_value = body.at("input"); + json chatcmpl_messages = json::array(); + + if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else if (input_value.is_string()) { + chatcmpl_messages.push_back({ + {"role", "user"}, + {"content", input_value}, + }); + } else { + std::invalid_argument("'input' must be a string or array of objects"); + } + + const std::string instructions = json_value(body, "instructions", std::string{}); + if (instructions != "") { + chatcmpl_messages.push_back({ + {"role", "system"}, + {"content", instructions}, + }); + } + + json chatcmpl_body = body; + chatcmpl_body.erase("input"); + chatcmpl_body["messages"] = chatcmpl_messages; + + return chatcmpl_body; +} + json convert_anthropic_to_oai(const json & body) { json oai_body; @@ -1485,6 +1522,24 @@ std::string format_oai_sse(const json & data) { return ss.str(); } +std::string format_oai_resp_sse(const json & data) { + std::ostringstream ss; + auto send_single = [&ss](const json & event_obj) { + ss << "event: " << event_obj.at("event").get() << "\n"; + ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n"; + }; + + if (data.is_array()) { + for (const auto & item : data) { + send_single(item); + } + } else { + send_single(data); + } + + return ss.str(); +} + std::string format_anthropic_sse(const json & data) { std::ostringstream ss; diff --git a/tools/server/server-common.h b/tools/server/server-common.h index 0629bb5edd..88f7cc7014 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -297,6 +297,9 @@ json oaicompat_chat_params_parse( const oaicompat_parser_options & opt, std::vector & out_files); +// convert OpenAI Responses API format to OpenAI Chat Completions API format +json convert_responses_to_chatcmpl(const json & body); + // convert Anthropic Messages API format to OpenAI Chat Completions API format json convert_anthropic_to_oai(const json & body); @@ -333,6 +336,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l // note: if data is a json array, it will be sent as multiple events, one per item std::string format_oai_sse(const json & data); +std::string format_oai_resp_sse(const json & data); + // format Anthropic-style SSE with event types std::string format_anthropic_sse(const json & data); diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 68a5fd8ab0..a4ef4ea12e 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2842,6 +2842,58 @@ static std::unique_ptr handle_completions_impl( json first_result_json = first_result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { res->data = format_anthropic_sse(first_result_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + const json created = { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json in_progress = { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json output_item_added = { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"type", "message"}, + {"status", "in_progress"}, + {"content", json::array()}, + {"role", "assistant"} + }} + }} + }; + const json content_part_added = { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"part", json { + {"type", "output_text"}, + {"text", ""} + }} + }} + }; + + std::string res_data; + res_data += format_oai_resp_sse(created); + res_data += format_oai_resp_sse(in_progress); + res_data += format_oai_resp_sse(output_item_added); + res_data += format_oai_resp_sse(content_part_added); + res_data += format_oai_resp_sse(first_result_json); + + res->data = res_data; } else { res->data = format_oai_sse(first_result_json); } @@ -2876,13 +2928,16 @@ static std::unique_ptr handle_completions_impl( // check if there is more data if (!rd.has_next()) { - if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { - // Anthropic doesn't send [DONE], message_stop was already sent - output = ""; - } else if (res_type != TASK_RESPONSE_TYPE_NONE) { - output = "data: [DONE]\n\n"; - } else { - output = ""; + switch (res_type) { + case TASK_RESPONSE_TYPE_NONE: + case TASK_RESPONSE_TYPE_OAI_RESP: + case TASK_RESPONSE_TYPE_ANTHROPIC: + output = ""; + break; + + default: + output = "data: [DONE]\n\n"; + break; } SRV_DBG("%s", "all results received, terminating stream\n"); return false; // no more data, terminate @@ -2909,6 +2964,8 @@ static std::unique_ptr handle_completions_impl( json res_json = result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { output = format_anthropic_sse(res_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + output = format_oai_resp_sse(res_json); } else { output = format_oai_sse(res_json); } @@ -3320,6 +3377,22 @@ void server_routes::init_routes() { TASK_RESPONSE_TYPE_OAI_CHAT); }; + this->post_responses_oai = [this](const server_http_req & req) { + std::vector files; + json body = convert_responses_to_chatcmpl(json::parse(req.body)); + json body_parsed = oaicompat_chat_params_parse( + body, + ctx_server.oai_parser_opt, + files); + return handle_completions_impl( + ctx_server, + SERVER_TASK_TYPE_COMPLETION, + body_parsed, + files, + req.should_stop, + TASK_RESPONSE_TYPE_OAI_RESP); + }; + this->post_anthropic_messages = [this](const server_http_req & req) { std::vector files; json body = convert_anthropic_to_oai(json::parse(req.body)); diff --git a/tools/server/server-context.h b/tools/server/server-context.h index 230b25952e..5e135a6a5e 100644 --- a/tools/server/server-context.h +++ b/tools/server/server-context.h @@ -69,6 +69,7 @@ struct server_routes { server_http_context::handler_t post_completions; server_http_context::handler_t post_completions_oai; server_http_context::handler_t post_chat_completions; + server_http_context::handler_t post_responses_oai; server_http_context::handler_t post_anthropic_messages; server_http_context::handler_t post_anthropic_count_tokens; server_http_context::handler_t post_apply_template; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 360826062b..54177cbcdc 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -580,6 +580,8 @@ json server_task_result_cmpl_final::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return stream ? to_json_anthropic_stream() : to_json_anthropic(); default: @@ -797,6 +799,122 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { return deltas; } +json server_task_result_cmpl_final::to_json_oaicompat_resp() { + common_chat_msg msg; + if (!oaicompat_msg.empty()) { + msg = oaicompat_msg; + } else { + msg.role = "assistant"; + msg.content = content; + } + + const json reasoning = { + {"type", "reasoning"}, + {"summary", json::array({json { + {"type", "summary_text"}, + {"text", msg.reasoning_content} + }})} + }; + const json message = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({json { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", msg.content} + }})}, + {"role", msg.role} + }; + + std::time_t t = std::time(0); + json res = { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({reasoning, message})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }}, + }; + + if (verbose) { + res["__verbose"] = to_json_non_oaicompat(); + } + if (timings.prompt_n >= 0) { + res.push_back({"timings", timings.to_json()}); + } + + return res; +} + +json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { + json server_sent_events = json::array(); + + server_sent_events.push_back(json { + {"event", "response.output_text.done"}, + {"data", json { + {"type", "response.output_text.done"}, + {"text", oaicompat_msg.content} + }} + }); + + const json part = { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", oaicompat_msg.content} + }; + + server_sent_events.push_back(json { + {"event", "response.content_part.done"}, + {"data", json { + {"type", "response.content_part.done"}, + {"part", part} + }} + }); + + const json item = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({part})}, + {"role", "assistant"} + }; + + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", item} + }} + }); + + std::time_t t = std::time(0); + server_sent_events.push_back(json { + {"event", "response.completed"}, + {"data", json { + {"type", "response.completed"}, + {"response", json { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({item})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }} + }}, + }} + }); + + return server_sent_events; +} + json server_task_result_cmpl_final::to_json_anthropic() { std::string stop_reason = "max_tokens"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { @@ -991,6 +1109,8 @@ json server_task_result_cmpl_partial::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return to_json_anthropic(); default: @@ -1115,6 +1235,33 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { return deltas; } +json server_task_result_cmpl_partial::to_json_oaicompat_resp() { + std::vector deltas; + + for (const auto & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + deltas.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"type", "response.reasoning_text.delta"}, + {"delta", diff.reasoning_content_delta} + }} + }); + } + if (!diff.content_delta.empty()) { + deltas.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"delta", diff.content_delta} + }} + }); + } + } + + return deltas; +} + // // server_task_result_embd // diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 0759094a01..6c64be3147 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -31,6 +31,7 @@ enum task_response_type { TASK_RESPONSE_TYPE_NONE, // llama.cpp native format TASK_RESPONSE_TYPE_OAI_CHAT, TASK_RESPONSE_TYPE_OAI_CMPL, + TASK_RESPONSE_TYPE_OAI_RESP, TASK_RESPONSE_TYPE_OAI_EMBD, TASK_RESPONSE_TYPE_ANTHROPIC, }; @@ -312,6 +313,10 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat_stream(); + json to_json_oaicompat_resp(); + + json to_json_oaicompat_resp_stream(); + json to_json_anthropic(); json to_json_anthropic_stream(); @@ -361,6 +366,8 @@ struct server_task_result_cmpl_partial : server_task_result { json to_json_oaicompat_chat(); + json to_json_oaicompat_resp(); + json to_json_anthropic(); }; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index b6b611b3f4..ce0de2f890 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -175,6 +175,7 @@ int main(int argc, char ** argv, char ** envp) { ctx_http.post("/v1/completions", ex_wrapper(routes.post_completions_oai)); ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions)); ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions)); + ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai)); ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting From 91e39c25a82627b52ffb6a2f569c7d7eba725ebe Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 03:48:46 +0000 Subject: [PATCH 02/15] Use `format_oai_resp_sse(array)` --- tools/server/server-context.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index a4ef4ea12e..dafd695b5c 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2886,14 +2886,14 @@ static std::unique_ptr handle_completions_impl( }} }; - std::string res_data; - res_data += format_oai_resp_sse(created); - res_data += format_oai_resp_sse(in_progress); - res_data += format_oai_resp_sse(output_item_added); - res_data += format_oai_resp_sse(content_part_added); - res_data += format_oai_resp_sse(first_result_json); - - res->data = res_data; + const json initial_events = json::array({ + created, + in_progress, + output_item_added, + content_part_added + }); + + res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json); } else { res->data = format_oai_sse(first_result_json); } From a49cbc80713e31ebe10e508a87dd483ce1c9e8f1 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 03:49:02 +0000 Subject: [PATCH 03/15] Register handlers --- tools/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index ce0de2f890..b53d43031d 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -140,6 +140,7 @@ int main(int argc, char ** argv, char ** envp) { routes.post_completions = models_routes->proxy_post; routes.post_completions_oai = models_routes->proxy_post; routes.post_chat_completions = models_routes->proxy_post; + routes.post_responses_oai = models_routes->proxy_post; routes.post_anthropic_messages = models_routes->proxy_post; routes.post_anthropic_count_tokens = models_routes->proxy_post; routes.post_infill = models_routes->proxy_post; From 3b16dac1eccaf5daa7223017e7bfe0e2faf9ff91 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 03:49:21 +0000 Subject: [PATCH 04/15] Also convert `max_output_tokens` field --- tools/server/server-common.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 2c29bb74e8..671ebe5a45 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1116,6 +1116,11 @@ json convert_responses_to_chatcmpl(const json & body) { chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; + if (body.contains("max_output_tokens")) { + chatcmpl_body.erase("max_output_tokens"); + chatcmpl_body["max_tokens"] = body["max_output_tokens"]; + } + return chatcmpl_body; } From 42440932460eb550c806b35f72b5df49eb7ca831 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 04:21:20 +0000 Subject: [PATCH 05/15] Add tests to check /v1/responses --- tools/server/tests/requirements.txt | 2 +- .../tests/unit/test_compat_oai_responses.py | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tools/server/tests/unit/test_compat_oai_responses.py diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt index 4ea7f19f77..ca79d025ed 100644 --- a/tools/server/tests/requirements.txt +++ b/tools/server/tests/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 prometheus-client~=0.20.0 requests~=2.32.3 wget~=3.2 diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py new file mode 100644 index 0000000000..2437055671 --- /dev/null +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -0,0 +1,49 @@ +import pytest +from openai import OpenAI +import openai +from utils import * + +server: ServerProcess + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + +def test_responses_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + res = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + ) + assert match_regex("(Suddenly)+", res.output_text) + +def test_responses_stream_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + stream = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + stream=True, + ) + + gathered_text = '' + for r in stream: + if type(r) == openai.types.responses.response_text_delta_event.ResponseTextDeltaEvent: + gathered_text += r.delta + if type(r) == openai.types.responses.response_completed_event.ResponseCompletedEvent: + assert gathered_text == r.response.output_text + assert match_regex("(Suddenly)+", r.response.output_text) From 07a43f83a3588bbcbde4264d070ae16585ebe608 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 05:36:22 +0000 Subject: [PATCH 06/15] catch up origin/master --- tools/server/server-context.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 023276cca6..ba904384b9 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3455,6 +3455,7 @@ void server_routes::init_routes() { }; this->post_responses_oai = [this](const server_http_req & req) { + auto res = std::make_unique(ctx_server); std::vector files; json body = convert_responses_to_chatcmpl(json::parse(req.body)); json body_parsed = oaicompat_chat_params_parse( @@ -3462,6 +3463,7 @@ void server_routes::init_routes() { ctx_server.oai_parser_opt, files); return handle_completions_impl( + std::move(res), ctx_server, SERVER_TASK_TYPE_COMPLETION, body_parsed, From 18cad941b684ad8d688a81ce4d92d56ffb4ab34e Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 08:35:34 +0000 Subject: [PATCH 07/15] Try to satisfy pyright --- tools/server/tests/unit/test_compat_oai_responses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py index 2437055671..28c7d2f881 100644 --- a/tools/server/tests/unit/test_compat_oai_responses.py +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -42,8 +42,8 @@ def test_responses_stream_with_openai_library(): gathered_text = '' for r in stream: - if type(r) == openai.types.responses.response_text_delta_event.ResponseTextDeltaEvent: + if r.type == "response.output_text.delta": gathered_text += r.delta - if type(r) == openai.types.responses.response_completed_event.ResponseCompletedEvent: + if r.type == "response.completed": assert gathered_text == r.response.output_text assert match_regex("(Suddenly)+", r.response.output_text) From feb8253c6c414ffe72af3d403dfa724174ebd249 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 09:58:05 +0000 Subject: [PATCH 08/15] Temporarily remove openai dependency from requirements-tool_bench --- requirements/requirements-tool_bench.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index f7912aff72..3813afb448 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,7 +3,6 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 -openai~=1.55.3 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 From 5e97d1df58714c79fccb1ba7542cac91d77db6e7 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 12:20:41 +0000 Subject: [PATCH 09/15] remove openai import --- tools/server/tests/unit/test_compat_oai_responses.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py index 28c7d2f881..e168f4562d 100644 --- a/tools/server/tests/unit/test_compat_oai_responses.py +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -1,6 +1,5 @@ import pytest from openai import OpenAI -import openai from utils import * server: ServerProcess From 0680e80db66a1adbfe32763f7d69f71cfdb4709a Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 21 Dec 2025 13:37:17 +0000 Subject: [PATCH 10/15] introduce bumped openai requirements --- requirements/requirements-tool_bench.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index 3813afb448..3bb74fb9d0 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,6 +3,7 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 +openai~=2.14.0 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 From ca178617c629017d1f3919a527f979fde752b66d Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 22 Dec 2025 00:26:32 +0000 Subject: [PATCH 11/15] See if openai version affects CI --- requirements/requirements-tool_bench.txt | 2 +- tools/server/tests/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index 3bb74fb9d0..f7912aff72 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,7 +3,7 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 -openai~=2.14.0 +openai~=1.55.3 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt index ca79d025ed..4ea7f19f77 100644 --- a/tools/server/tests/requirements.txt +++ b/tools/server/tests/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 numpy~=1.26.4 -openai~=2.14.0 +openai~=1.55.3 prometheus-client~=0.20.0 requests~=2.32.3 wget~=3.2 From f1aff3129407d07af6ed154d6fa9f89512110ac9 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Mon, 22 Dec 2025 02:23:23 +0000 Subject: [PATCH 12/15] Bump openai version again --- requirements/requirements-tool_bench.txt | 2 +- tools/server/tests/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index f7912aff72..3bb74fb9d0 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,7 +3,7 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt index 4ea7f19f77..ca79d025ed 100644 --- a/tools/server/tests/requirements.txt +++ b/tools/server/tests/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 prometheus-client~=0.20.0 requests~=2.32.3 wget~=3.2 From 43055354bae4a4a6a40cf442fc4eb214a830b510 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Thu, 25 Dec 2025 02:34:09 +0000 Subject: [PATCH 13/15] catch up with origin/master --- tools/server/server-context.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 97db4f4594..d6724f708e 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3521,7 +3521,7 @@ void server_routes::init_routes() { }; this->post_responses_oai = [this](const server_http_req & req) { - auto res = std::make_unique(ctx_server); + auto res = create_response(); std::vector files; json body = convert_responses_to_chatcmpl(json::parse(req.body)); json body_parsed = oaicompat_chat_params_parse( @@ -3529,12 +3529,10 @@ void server_routes::init_routes() { ctx_server.oai_parser_opt, files); return handle_completions_impl( - std::move(res), - ctx_server, + req, SERVER_TASK_TYPE_COMPLETION, body_parsed, files, - req.should_stop, TASK_RESPONSE_TYPE_OAI_RESP); }; From 7f2ca87e3c49acd6e6adfceb288ff8821353e3e3 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 28 Dec 2025 13:37:29 +0000 Subject: [PATCH 14/15] Make instruction(system) as first message --- tools/server/server-common.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 96ec86edbc..0605c4ecae 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1081,23 +1081,23 @@ json convert_responses_to_chatcmpl(const json & body) { const json input_value = body.at("input"); json chatcmpl_messages = json::array(); - if (input_value.is_array()) { - chatcmpl_messages = input_value; - } else if (input_value.is_string()) { + const std::string instructions = json_value(body, "instructions", std::string()); + if (instructions != "") { chatcmpl_messages.push_back({ - {"role", "user"}, - {"content", input_value}, + {"role", "system"}, + {"content", instructions}, }); - } else { - std::invalid_argument("'input' must be a string or array of objects"); } - const std::string instructions = json_value(body, "instructions", std::string{}); - if (instructions != "") { + if (input_value.is_string()) { chatcmpl_messages.push_back({ - {"role", "system"}, - {"content", instructions}, + {"role", "user"}, + {"content", input_value}, }); + } else if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else { + throw std::invalid_argument("'input' must be a string or array of objects"); } json chatcmpl_body = body; From 02ece762fbd004276d4f5df360e6123939337313 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Sun, 28 Dec 2025 13:38:50 +0000 Subject: [PATCH 15/15] Convert [input_message] --- tools/server/server-common.cpp | 69 +++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 0605c4ecae..78c530f1ed 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1095,7 +1095,74 @@ json convert_responses_to_chatcmpl(const json & body) { {"content", input_value}, }); } else if (input_value.is_array()) { - chatcmpl_messages = input_value; + for (const auto & input_message : input_value) { + if (!input_message.contains("content")) { + throw std::invalid_argument("'content' is required"); + } + const json content = input_message.at("content"); + + if (content.is_string()) { + chatcmpl_messages.push_back(input_message); + } else if (content.is_array()) { + json new_content = json::array(); + + for (const auto & input_item : content) { + const std::string type = json_value(input_item, "type", std::string()); + + if (type == "input_text") { + if (!input_item.contains("text")) { + throw std::invalid_argument("'Input text' requires 'text'"); + } + new_content.push_back({ + {"text", input_item.at("text")}, + {"type", "text"} + }); + } else if (type == "input_image") { + // While `detail` is marked as required, + // it has default value("auto") and can be omitted. + + if (!input_item.contains("image_url")) { + throw std::invalid_argument("'image_url' is required"); + } + new_content.push_back({ + {"image_url", json {{"url", input_item.at("image_url")}}}, + {"type", "image_url"} + }); + } else if (type == "input_file") { + if (input_item.contains("file_url")) { + throw std::invalid_argument("'file_url' is not supported"); + } + if (!input_item.contains("file_data") || !input_item.contains("filename")) { + throw std::invalid_argument("Both 'file_data' and 'filename' are required"); + } + new_content.push_back({ + {"file", json { + {"file_data", input_item.at("file_data")}, + {"filename", input_item.at("filename")}}}, + {"type", "file"} + }); + } else { + throw std::invalid_argument("'type' must be one of input_text, input_image, or input_file"); + } + } + + json new_input_message = input_message; + new_input_message["content"] = new_content; + + chatcmpl_messages.push_back(new_input_message); + } else { + throw std::invalid_argument("'content' must be a string or array of objects"); + } + + const std::string role = json_value(input_message, "role", std::string()); + if (role != "user" && role != "assistant" && role != "system" && role != "developer") { + throw std::invalid_argument("'role' must be one of user, assistant, system, or developer"); + } + + if (input_message.contains("type") && input_message.at("type") != "message") { + throw std::invalid_argument("If 'type' is defined, it should be 'message'"); + } + } } else { throw std::invalid_argument("'input' must be a string or array of objects"); }