From 47134fc172e3fbab231cfdbcb153f7bdb3c8f031 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:44:08 +0000
Subject: [PATCH 01/23] from previous PR

---
 requirements/requirements-tool_bench.txt      |   2 +-
 tools/server/server-common.cpp                |  60 +++++++
 tools/server/server-common.h                  |   5 +
 tools/server/server-context.cpp               |  87 ++++++++++-
 tools/server/server-context.h                 |   1 +
 tools/server/server-task.cpp                  | 147 ++++++++++++++++++
 tools/server/server-task.h                    |   7 +
 tools/server/server.cpp                       |   2 +
 tools/server/tests/requirements.txt           |   2 +-
 .../tests/unit/test_compat_oai_responses.py   |  48 ++++++
 10 files changed, 352 insertions(+), 9 deletions(-)
 create mode 100644 tools/server/tests/unit/test_compat_oai_responses.py

diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt
index f7912aff72..3bb74fb9d0 100644
--- a/requirements/requirements-tool_bench.txt
+++ b/requirements/requirements-tool_bench.txt
@@ -3,7 +3,7 @@ pytest~=8.3.3
 huggingface_hub>=0.34.0,<1.0
 matplotlib~=3.10.0
 numpy~=1.26.4
-openai~=1.55.3
+openai~=2.14.0
 pandas~=2.2.3
 prometheus-client~=0.20.0
 requests~=2.32.3
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 4aeeda2ffe..a615760954 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1069,6 +1069,48 @@ json oaicompat_chat_params_parse(
     return llama_params;
 }
 
+json convert_responses_to_chatcmpl(const json & body) {
+    if (!body.contains("input")) {
+        throw std::invalid_argument("'input' is required");
+    }
+    if (!json_value(body, "previous_response_id", std::string{}).empty()) {
+        throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
+    }
+
+    const json input_value = body.at("input");
+    json chatcmpl_messages = json::array();
+
+    if (input_value.is_array()) {
+        chatcmpl_messages = input_value;
+    } else if (input_value.is_string()) {
+        chatcmpl_messages.push_back({
+            {"role",    "user"},
+            {"content", input_value},
+        });
+    } else {
+        std::invalid_argument("'input' must be a string or array of objects");
+    }
+
+    const std::string instructions = json_value(body, "instructions", std::string{});
+    if (instructions != "") {
+        chatcmpl_messages.push_back({
+            {"role",    "system"},
+            {"content", instructions},
+        });
+    }
+
+    json chatcmpl_body = body;
+    chatcmpl_body.erase("input");
+    chatcmpl_body["messages"] = chatcmpl_messages;
+
+    if (body.contains("max_output_tokens")) {
+        chatcmpl_body.erase("max_output_tokens");
+        chatcmpl_body["max_tokens"] = body["max_output_tokens"];
+    }
+
+    return chatcmpl_body;
+}
+
 json convert_anthropic_to_oai(const json & body) {
     json oai_body;
 
@@ -1482,6 +1524,24 @@ std::string format_oai_sse(const json & data) {
     return ss.str();
 }
 
+std::string format_oai_resp_sse(const json & data) {
+    std::ostringstream ss;
+    auto send_single = [&ss](const json & event_obj) {
+        ss << "event: " << event_obj.at("event").get<std::string>() << "\n";
+        ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n";
+    };
+
+    if (data.is_array()) {
+        for (const auto & item : data) {
+            send_single(item);
+        }
+    } else {
+        send_single(data);
+    }
+
+    return ss.str();
+}
+
 std::string format_anthropic_sse(const json & data) {
     std::ostringstream ss;
 
diff --git a/tools/server/server-common.h b/tools/server/server-common.h
index a88d40494a..2629a6bee9 100644
--- a/tools/server/server-common.h
+++ b/tools/server/server-common.h
@@ -294,6 +294,9 @@ json oaicompat_chat_params_parse(
     const server_chat_params & opt,
     std::vector<raw_buffer> & out_files);
 
+// convert OpenAI Responses API format to OpenAI Chat Completions API format
+json convert_responses_to_chatcmpl(const json & body);
+
 // convert Anthropic Messages API format to OpenAI Chat Completions API format
 json convert_anthropic_to_oai(const json & body);
 
@@ -331,6 +334,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l
 // note: if data is a json array, it will be sent as multiple events, one per item
 std::string format_oai_sse(const json & data);
 
+std::string format_oai_resp_sse(const json & data);
+
 // format Anthropic-style SSE with event types
 std::string format_anthropic_sse(const json & data);
 
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index f1f677addd..c84e36c4a6 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -3073,6 +3073,58 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
         json first_result_json = first_result->to_json();
         if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
             res->data = format_anthropic_sse(first_result_json);
+        } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+            const json created = {
+                {"event", "response.created"},
+                {"data", json {
+                    {"type", "response.created"},
+                    {"response", json {
+                        {"object", "response"},
+                        {"status", "in_progress"}
+                    }}
+                }}
+            };
+            const json in_progress = {
+                {"event", "response.in_progress"},
+                {"data", json {
+                    {"type", "response.in_progress"},
+                    {"response", json {
+                        {"object", "response"},
+                        {"status", "in_progress"}
+                    }}
+                }}
+            };
+            const json output_item_added = {
+                {"event", "response.output_item.added"},
+                {"data", json {
+                    {"type", "response.output_item.added"},
+                    {"item", json {
+                        {"type", "message"},
+                        {"status", "in_progress"},
+                        {"content", json::array()},
+                        {"role", "assistant"}
+                    }}
+                }}
+            };
+            const json content_part_added = {
+                {"event", "response.content_part.added"},
+                {"data", json {
+                    {"type", "response.content_part.added"},
+                    {"part", json {
+                        {"type", "output_text"},
+                        {"text", ""}
+                    }}
+                }}
+            };
+
+            const json initial_events = json::array({
+                created,
+                in_progress,
+                output_item_added,
+                content_part_added
+            });
+
+            res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json);
         } else {
             res->data = format_oai_sse(first_result_json);
         }
@@ -3107,13 +3159,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
 
                 // check if there is more data
                 if (!rd.has_next()) {
-                    if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
-                        // Anthropic doesn't send [DONE], message_stop was already sent
-                        output = "";
-                    } else if (res_type != TASK_RESPONSE_TYPE_NONE) {
-                        output = "data: [DONE]\n\n";
-                    } else {
-                        output = "";
+                    switch (res_type) {
+                        case TASK_RESPONSE_TYPE_NONE:
+                        case TASK_RESPONSE_TYPE_OAI_RESP:
+                        case TASK_RESPONSE_TYPE_ANTHROPIC:
+                            output = "";
+                            break;
+
+                        default:
+                            output = "data: [DONE]\n\n";
+                            break;
                     }
                     SRV_DBG("%s", "all results received, terminating stream\n");
                     return false; // no more data, terminate
@@ -3141,6 +3196,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
                     json res_json = result->to_json();
                     if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
                         output = format_anthropic_sse(res_json);
+                    } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+                        output = format_oai_resp_sse(res_json);
                     } else {
                         output = format_oai_sse(res_json);
                     }
@@ -3575,6 +3632,22 @@ void server_routes::init_routes() {
             TASK_RESPONSE_TYPE_OAI_CHAT);
     };
 
+    this->post_responses_oai = [this](const server_http_req & req) {
+        auto res = create_response();
+        std::vector<raw_buffer> files;
+        json body = convert_responses_to_chatcmpl(json::parse(req.body));
+        json body_parsed = oaicompat_chat_params_parse(
+            body,
+            ctx_server.oai_parser_opt,
+            files);
+        return handle_completions_impl(
+            req,
+            SERVER_TASK_TYPE_COMPLETION,
+            body_parsed,
+            files,
+            TASK_RESPONSE_TYPE_OAI_RESP);
+    };
+
     this->post_anthropic_messages = [this](const server_http_req & req) {
         auto res = create_response();
         std::vector<raw_buffer> files;
diff --git a/tools/server/server-context.h b/tools/server/server-context.h
index ec1df96950..3e5e870fc5 100644
--- a/tools/server/server-context.h
+++ b/tools/server/server-context.h
@@ -94,6 +94,7 @@ struct server_routes {
     server_http_context::handler_t post_completions;
     server_http_context::handler_t post_completions_oai;
     server_http_context::handler_t post_chat_completions;
+    server_http_context::handler_t post_responses_oai;
     server_http_context::handler_t post_anthropic_messages;
     server_http_context::handler_t post_anthropic_count_tokens;
     server_http_context::handler_t post_apply_template;
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 2add9667d1..03f63f958d 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -584,6 +584,8 @@ json server_task_result_cmpl_final::to_json() {
             return to_json_oaicompat();
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
+        case TASK_RESPONSE_TYPE_OAI_RESP:
+            return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return stream ? to_json_anthropic_stream() : to_json_anthropic();
         default:
@@ -801,6 +803,122 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
     return deltas;
 }
 
+json server_task_result_cmpl_final::to_json_oaicompat_resp() {
+    common_chat_msg msg;
+    if (!oaicompat_msg.empty()) {
+        msg = oaicompat_msg;
+    } else {
+        msg.role = "assistant";
+        msg.content = content;
+    }
+
+    const json reasoning = {
+        {"type",    "reasoning"},
+        {"summary", json::array({json {
+            {"type", "summary_text"},
+            {"text", msg.reasoning_content}
+        }})}
+    };
+    const json message = {
+        {"type",    "message"},
+        {"status",  "completed"},
+        {"content", json::array({json {
+            {"type",        "output_text"},
+            {"annotations", json::array()},
+            {"logprobs",    json::array()},
+            {"text",        msg.content}
+        }})},
+        {"role", msg.role}
+    };
+
+    std::time_t t = std::time(0);
+    json res = {
+        {"object",     "response"},
+        {"created_at", t},
+        {"status",     "completed"},
+        {"model",      oaicompat_model},
+        {"output",     json::array({reasoning, message})},
+        {"usage",      json {
+            {"input_tokens",  n_prompt_tokens},
+            {"output_tokens", n_decoded},
+            {"total_tokens",  n_decoded + n_prompt_tokens}
+        }},
+    };
+
+    if (verbose) {
+        res["__verbose"] = to_json_non_oaicompat();
+    }
+    if (timings.prompt_n >= 0) {
+        res.push_back({"timings", timings.to_json()});
+    }
+
+    return res;
+}
+
+json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
+    json server_sent_events = json::array();
+
+    server_sent_events.push_back(json {
+        {"event", "response.output_text.done"},
+        {"data", json {
+            {"type", "response.output_text.done"},
+            {"text", oaicompat_msg.content}
+        }}
+    });
+
+    const json part = {
+        {"type",        "output_text"},
+        {"annotations", json::array()},
+        {"logprobs",    json::array()},
+        {"text",        oaicompat_msg.content}
+    };
+
+    server_sent_events.push_back(json {
+        {"event", "response.content_part.done"},
+        {"data", json {
+            {"type", "response.content_part.done"},
+            {"part", part}
+        }}
+    });
+
+    const json item = {
+        {"type",    "message"},
+        {"status",  "completed"},
+        {"content", json::array({part})},
+        {"role",    "assistant"}
+    };
+
+    server_sent_events.push_back(json {
+        {"event", "response.output_item.done"},
+        {"data", json {
+            {"type", "response.output_item.done"},
+            {"item", item}
+        }}
+    });
+
+    std::time_t t = std::time(0);
+    server_sent_events.push_back(json {
+        {"event", "response.completed"},
+        {"data", json {
+            {"type", "response.completed"},
+            {"response", json {
+                {"object",     "response"},
+                {"created_at", t},
+                {"status",     "completed"},
+                {"model",      oaicompat_model},
+                {"output",     json::array({item})},
+                {"usage",      json {
+                    {"input_tokens",  n_prompt_tokens},
+                    {"output_tokens", n_decoded},
+                    {"total_tokens",  n_decoded + n_prompt_tokens}
+                }}
+            }},
+        }}
+    });
+
+    return server_sent_events;
+}
+
 json server_task_result_cmpl_final::to_json_anthropic() {
     std::string stop_reason = "max_tokens";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
@@ -1066,6 +1184,8 @@ json server_task_result_cmpl_partial::to_json() {
             return to_json_oaicompat();
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return to_json_oaicompat_chat();
+        case TASK_RESPONSE_TYPE_OAI_RESP:
+            return to_json_oaicompat_resp();
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return to_json_anthropic();
         default:
@@ -1135,6 +1255,33 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
     return res;
 }
 
+json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
+    std::vector<json> deltas;
+
+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty()) {
+            deltas.push_back(json {
+                {"event", "response.reasoning_text.delta"},
+                {"data", json {
+                    {"type",  "response.reasoning_text.delta"},
+                    {"delta", diff.reasoning_content_delta}
+                }}
+            });
+        }
+        if (!diff.content_delta.empty()) {
+            deltas.push_back(json {
+                {"event", "response.output_text.delta"},
+                {"data", json {
+                    {"type",  "response.output_text.delta"},
+                    {"delta", diff.content_delta}
+                }}
+            });
+        }
+    }
+
+    return deltas;
+}
+
 json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
     bool first = n_decoded == 1;
     std::time_t t = std::time(0);
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 6835eef507..5c71bb6d21 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -33,6 +33,7 @@ enum task_response_type {
     TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
     TASK_RESPONSE_TYPE_OAI_CHAT,
     TASK_RESPONSE_TYPE_OAI_CMPL,
+    TASK_RESPONSE_TYPE_OAI_RESP,
     TASK_RESPONSE_TYPE_OAI_EMBD,
     TASK_RESPONSE_TYPE_ANTHROPIC,
 };
@@ -371,6 +372,10 @@ struct server_task_result_cmpl_final : server_task_result {
 
     json to_json_oaicompat_chat_stream();
 
+    json to_json_oaicompat_resp();
+
+    json to_json_oaicompat_resp_stream();
+
     json to_json_anthropic();
 
     json to_json_anthropic_stream();
@@ -436,6 +441,8 @@ struct server_task_result_cmpl_partial : server_task_result {
 
     json to_json_oaicompat_chat();
 
+    json to_json_oaicompat_resp();
+
     json to_json_anthropic();
 };
 
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index 1d9abf6055..d3d4316026 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -140,6 +140,7 @@ int main(int argc, char ** argv) {
         routes.post_completions            = models_routes->proxy_post;
         routes.post_completions_oai        = models_routes->proxy_post;
         routes.post_chat_completions       = models_routes->proxy_post;
+        routes.post_responses_oai          = models_routes->proxy_post;
         routes.post_anthropic_messages     = models_routes->proxy_post;
         routes.post_anthropic_count_tokens = models_routes->proxy_post;
         routes.post_infill                 = models_routes->proxy_post;
@@ -176,6 +177,7 @@ int main(int argc, char ** argv) {
     ctx_http.post("/chat/completions",    ex_wrapper(routes.post_chat_completions));
     ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
     ctx_http.post("/api/chat",            ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
+    ctx_http.post("/v1/responses",        ex_wrapper(routes.post_responses_oai));
     ctx_http.post("/v1/messages",         ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
     ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
     ctx_http.post("/infill",              ex_wrapper(routes.post_infill));
diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt
index 4ea7f19f77..ca79d025ed 100644
--- a/tools/server/tests/requirements.txt
+++ b/tools/server/tests/requirements.txt
@@ -2,7 +2,7 @@ aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub>=0.34.0,<1.0
 numpy~=1.26.4
-openai~=1.55.3
+openai~=2.14.0
 prometheus-client~=0.20.0
 requests~=2.32.3
 wget~=3.2
diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py
new file mode 100644
index 0000000000..e168f4562d
--- /dev/null
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@@ -0,0 +1,48 @@
+import pytest
+from openai import OpenAI
+from utils import *
+
+server: ServerProcess
+
+@pytest.fixture(autouse=True)
+def create_server():
+    global server
+    server = ServerPreset.tinyllama2()
+
+def test_responses_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    res = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+    )
+    assert match_regex("(Suddenly)+", res.output_text)
+
+def test_responses_stream_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    stream = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+        stream=True,
+    )
+
+    gathered_text = ''
+    for r in stream:
+        if r.type == "response.output_text.delta":
+            gathered_text += r.delta
+        if r.type == "response.completed":
+            assert gathered_text == r.response.output_text
+            assert match_regex("(Suddenly)+", r.response.output_text)

From c41a6d7dd30eaa8aca7e6e9039af83eb18510484 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:44:41 +0000
Subject: [PATCH 02/23] Make instruction(system) as first message

---
 tools/server/server-common.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index a615760954..e1f28f35ed 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1080,23 +1080,23 @@ json convert_responses_to_chatcmpl(const json & body) {
     const json input_value = body.at("input");
     json chatcmpl_messages = json::array();
 
-    if (input_value.is_array()) {
-        chatcmpl_messages = input_value;
-    } else if (input_value.is_string()) {
+    const std::string instructions = json_value(body, "instructions", std::string());
+    if (instructions != "") {
         chatcmpl_messages.push_back({
-            {"role",    "user"},
-            {"content", input_value},
+            {"role",    "system"},
+            {"content", instructions},
         });
-    } else {
-        std::invalid_argument("'input' must be a string or array of objects");
     }
 
-    const std::string instructions = json_value(body, "instructions", std::string{});
-    if (instructions != "") {
+    if (input_value.is_string()) {
         chatcmpl_messages.push_back({
-            {"role",    "system"},
-            {"content", instructions},
+            {"role",    "user"},
+            {"content", input_value},
         });
+    } else if (input_value.is_array()) {
+        chatcmpl_messages = input_value;
+    } else {
+        throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
     json chatcmpl_body = body;

From aa2238ea55c028817241a345bbc7bdf923287832 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:45:32 +0000
Subject: [PATCH 03/23] Convert [input_message] (text/image/file)

---
 tools/server/server-common.cpp | 70 +++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index e1f28f35ed..373e84e36a 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1094,7 +1094,75 @@ json convert_responses_to_chatcmpl(const json & body) {
             {"content", input_value},
         });
     } else if (input_value.is_array()) {
-        chatcmpl_messages = input_value;
+        for (const auto & input_message : input_value) {
+            if (!input_message.contains("content")) {
+                throw std::invalid_argument("'content' is required");
+            }
+            const json content = input_message.at("content");
+
+            if (content.is_string()) {
+                chatcmpl_messages.push_back(input_message);
+            } else if (content.is_array()) {
+                json new_content = json::array();
+
+                for (const auto & input_item : content) {
+                    const std::string type = json_value(input_item, "type", std::string());
+
+                    if (type == "input_text") {
+                        if (!input_item.contains("text")) {
+                            throw std::invalid_argument("'Input text' requires 'text'");
+                        }
+                        new_content.push_back({
+                            {"text", input_item.at("text")},
+                            {"type", "text"}
+                        });
+                    } else if (type == "input_image") {
+                        // While `detail` is marked as required,
+                        // it has default value("auto") and can be omitted.
+
+                        if (!input_item.contains("image_url")) {
+                            throw std::invalid_argument("'image_url' is required");
+                        }
+                        new_content.push_back({
+                            {"image_url", json {{"url", input_item.at("image_url")}}},
+                            {"type", "image_url"}
+                        });
+                    } else if (type == "input_file") {
+                        if (input_item.contains("file_url")) {
+                            // chat completion API does not support file_url
+                            throw std::invalid_argument("'file_url' is not supported");
+                        }
+                        if (!input_item.contains("file_data") || !input_item.contains("filename")) {
+                            throw std::invalid_argument("Both 'file_data' and 'filename' are required");
+                        }
+                        new_content.push_back({
+                            {"file", json {
+                                {"file_data", input_item.at("file_data")},
+                                {"filename",  input_item.at("filename")}}},
+                            {"type", "file"}
+                        });
+                    } else {
+                        throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'");
+                    }
+                }
+
+                json new_input_message = input_message;
+                new_input_message["content"] = new_content;
+
+                chatcmpl_messages.push_back(new_input_message);
+            } else {
+                throw std::invalid_argument("'content' must be a string or array of objects");
+            }
+
+            const std::string role = json_value(input_message, "role", std::string());
+            if (role != "user" && role != "assistant" && role != "system" && role != "developer") {
+                throw std::invalid_argument("'role' must be one of user, assistant, system, or developer");
+            }
+
+            if (input_message.contains("type") && input_message.at("type") != "message") {
+                throw std::invalid_argument("If 'type' is defined, it should be 'message'");
+            }
+        }
     } else {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }

From fd0a13bb7507c058132a4b972f525cd0a0f49b6f Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 06:29:05 +0000
Subject: [PATCH 04/23] Rename convert_responses_to_chatcmpl(body) ->
 response_body

---
 tools/server/server-common.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 373e84e36a..18d79a1fe0 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1069,18 +1069,18 @@ json oaicompat_chat_params_parse(
     return llama_params;
 }
 
-json convert_responses_to_chatcmpl(const json & body) {
-    if (!body.contains("input")) {
+json convert_responses_to_chatcmpl(const json & response_body) {
+    if (!response_body.contains("input")) {
         throw std::invalid_argument("'input' is required");
     }
-    if (!json_value(body, "previous_response_id", std::string{}).empty()) {
+    if (!json_value(response_body, "previous_response_id", std::string{}).empty()) {
         throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
     }
 
-    const json input_value = body.at("input");
+    const json input_value = response_body.at("input");
     json chatcmpl_messages = json::array();
 
-    const std::string instructions = json_value(body, "instructions", std::string());
+    const std::string instructions = json_value(response_body, "instructions", std::string());
     if (instructions != "") {
         chatcmpl_messages.push_back({
             {"role",    "system"},
@@ -1167,13 +1167,13 @@ json convert_responses_to_chatcmpl(const json & body) {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
-    json chatcmpl_body = body;
+    json chatcmpl_body = response_body;
     chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;
 
-    if (body.contains("max_output_tokens")) {
+    if (response_body.contains("max_output_tokens")) {
         chatcmpl_body.erase("max_output_tokens");
-        chatcmpl_body["max_tokens"] = body["max_output_tokens"];
+        chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
     }
 
     return chatcmpl_body;

From f4a87c01b885c57412ef92b66b028839e3d87dc0 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Thu, 1 Jan 2026 00:26:37 +0000
Subject: [PATCH 05/23] Initial tool call support

---
 tools/server/server-common.cpp | 168 ++++++++++++++++++++++++++++-----
 tools/server/server-task.cpp   | 120 ++++++++++++++++-------
 2 files changed, 228 insertions(+), 60 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 18d79a1fe0..0cb1b620a8 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1089,30 +1089,52 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     }
 
     if (input_value.is_string()) {
+        // #responses_create-input-text_input
         chatcmpl_messages.push_back({
             {"role",    "user"},
             {"content", input_value},
         });
     } else if (input_value.is_array()) {
-        for (const auto & input_message : input_value) {
-            if (!input_message.contains("content")) {
-                throw std::invalid_argument("'content' is required");
+        // #responses_create-input-input_item_list
+
+        const auto exists_and_is_array = [](const json & j, const char * key) -> bool {
+            return j.contains(key) && j.at(key).is_array();
+        };
+        const auto exists_and_is_string = [](const json & j, const char * key) -> bool {
+            return j.contains(key) && j.at(key).is_string();
+        };
+
+        for (json item : input_value) {
+            if (exists_and_is_string(item, "content")) {
+                // #responses_create-input-input_item_list-input_message-content-text_input
+                // Only "Input message" contains item["content"]::string
+                // After converting item["content"]::string to item["content"]::array,
+                // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message"
+                item["content"] = json::array({
+                    json {
+                        {"text", item.at("content")},
+                        {"type", "input_text"}
+                    }
+                });
             }
-            const json content = input_message.at("content");
 
-            if (content.is_string()) {
-                chatcmpl_messages.push_back(input_message);
-            } else if (content.is_array()) {
-                json new_content = json::array();
+            if (exists_and_is_array(item, "content") &&
+                exists_and_is_string(item, "role") &&
+                (item.at("role") == "user" ||
+                    item.at("role") == "system" ||
+                    item.at("role") == "developer")
+            ) {
+                // #responses_create-input-input_item_list-item-input_message
+                json chatcmpl_content = json::array();
 
-                for (const auto & input_item : content) {
+                for (const json & input_item : item.at("content")) {
                     const std::string type = json_value(input_item, "type", std::string());
 
                     if (type == "input_text") {
                         if (!input_item.contains("text")) {
                             throw std::invalid_argument("'Input text' requires 'text'");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"text", input_item.at("text")},
                             {"type", "text"}
                         });
@@ -1123,7 +1145,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         if (!input_item.contains("image_url")) {
                             throw std::invalid_argument("'image_url' is required");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"image_url", json {{"url", input_item.at("image_url")}}},
                             {"type", "image_url"}
                         });
@@ -1135,7 +1157,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         if (!input_item.contains("file_data") || !input_item.contains("filename")) {
                             throw std::invalid_argument("Both 'file_data' and 'filename' are required");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"file", json {
                                 {"file_data", input_item.at("file_data")},
                                 {"filename",  input_item.at("filename")}}},
@@ -1146,21 +1168,93 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     }
                 }
 
-                json new_input_message = input_message;
-                new_input_message["content"] = new_content;
+                if (item.contains("type")) {
+                    item.erase("type");
+                }
+                if (item.contains("status")) {
+                    item.erase("status");
+                }
+                item["content"] = chatcmpl_content;
+
+                chatcmpl_messages.push_back(item);
+            } else if (exists_and_is_array(item, "content") &&
+                exists_and_is_string(item, "role") &&
+                item.at("role") == "assistant" &&
+                exists_and_is_string(item, "status") &&
+                (item.at("status") == "in_progress" ||
+                    item.at("status") == "completed" ||
+                    item.at("status") == "incomplete") &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "message"
+            ) {
+                // #responses_create-input-input_item_list-item-output_message
+                json chatcmpl_content = json::array();
+
+                for (const auto & output_text : item.at("content")) {
+                    const std::string type = json_value(output_text, "type", std::string());
+                    if (type != "output_text") {
+                        throw std::invalid_argument("'type' must be 'output_text'");
+                    }
+                    if (!exists_and_is_string(output_text, "text")) {
+                        throw std::invalid_argument("'Output text' requires 'text'");
+                    }
+                    // Ignore annotations and logprobs for now
+                    chatcmpl_content.push_back({
+                        {"text", output_text.at("text")},
+                        {"type", "text"}
+                    });
+                }
 
-                chatcmpl_messages.push_back(new_input_message);
+                item.erase("status");
+                item.erase("type");
+                item["content"] = chatcmpl_content;
+                chatcmpl_messages.push_back(item);
+            } else if (exists_and_is_string(item, "arguments") &&
+                exists_and_is_string(item, "call_id") &&
+                exists_and_is_string(item, "name") &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "function_call"
+            ) {
+                // #responses_create-input-input_item_list-item-function_tool_call
+                chatcmpl_messages.push_back(json {
+                    {"role", "assistant"},
+                    {"tool_calls", json::array({ json {
+                        {"function", json {
+                            {"arguments", item.at("arguments")},
+                            {"name",      item.at("name")}
+                        }},
+                        {"id",   item.at("call_id")},
+                        {"type", "function"}
+                    }})},
+                });
+            } else if (exists_and_is_string(item, "call_id") &&
+                (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "function_call_output"
+            ) {
+                // #responses_create-input-input_item_list-item-function_tool_call_output
+                if (item.at("output").is_string()) {
+                    chatcmpl_messages.push_back(json {
+                        {"content",      item.at("output")},
+                        {"role",         "tool"},
+                        {"tool_call_id", item.at("call_id")}
+                    });
+                } else {
+                    json chatcmpl_outputs = item.at("output");
+                    for (json & chatcmpl_output : chatcmpl_outputs) {
+                        if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") {
+                            throw std::invalid_argument("Output of tool call should be 'Input text'");
+                        }
+                        chatcmpl_output["type"] = "text";
+                    }
+                    chatcmpl_messages.push_back(json {
+                        {"content",      chatcmpl_outputs},
+                        {"role",         "tool"},
+                        {"tool_call_id", item.at("call_id")}
+                    });
+                }
             } else {
-                throw std::invalid_argument("'content' must be a string or array of objects");
-            }
-
-            const std::string role = json_value(input_message, "role", std::string());
-            if (role != "user" && role != "assistant" && role != "system" && role != "developer") {
-                throw std::invalid_argument("'role' must be one of user, assistant, system, or developer");
-            }
-
-            if (input_message.contains("type") && input_message.at("type") != "message") {
-                throw std::invalid_argument("If 'type' is defined, it should be 'message'");
+                throw std::invalid_argument("Cannot determine type of 'item'");
             }
         }
     } else {
@@ -1171,6 +1265,30 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;
 
+    if (response_body.contains("tools")) {
+        if (!response_body.at("tools").is_array()) {
+            throw std::invalid_argument("'tools' must be an array of objects");
+        }
+        json chatcmpl_tools = json::array();
+        for (json resp_tool : response_body.at("tools")) {
+            json chatcmpl_tool;
+
+            if (json_value(resp_tool, "type", std::string()) != "function") {
+                throw std::invalid_argument("'type' of tool must be 'function'");
+            }
+            resp_tool.erase("type");
+            chatcmpl_tool["type"] = "function";
+
+            if (!resp_tool.contains("strict")) {
+                resp_tool["strict"] = true;
+            }
+            chatcmpl_tool["function"] = resp_tool;
+            chatcmpl_tools.push_back(chatcmpl_tool);
+        }
+        chatcmpl_body.erase("tools");
+        chatcmpl_body["tools"] = chatcmpl_tools;
+    }
+
     if (response_body.contains("max_output_tokens")) {
         chatcmpl_body.erase("max_output_tokens");
         chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 03f63f958d..e78086e0aa 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -857,44 +857,69 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
 
 json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     json server_sent_events = json::array();
+    json output = json::array();
+
+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+        server_sent_events.push_back(json {
+            {"event", "response.output_item.done"},
+            {"data", json {
+                {"type", "response.output_item.done"},
+                {"item", json {
+                    {"type",      "function_call"},
+                    {"status",    "completed"},
+                    {"arguments", tool_call.arguments},
+                    {"call_id",   "call_dummy_id"},
+                    {"name",      tool_call.name}
+                }}
+            }}
+        });
+        output.push_back({
+            {"type",      "function_call"},
+            {"status",    "completed"},
+            {"arguments", tool_call.arguments},
+            {"name",      tool_call.name}
+        });
+    }
 
-    server_sent_events.push_back(json {
-        {"event", "response.output_text.done"},
-        {"data", json {
-            {"type", "response.output_text.done"},
-            {"text", oaicompat_msg.content}
-        }}
-    });
-
-    const json part = {
-        {"type",        "output_text"},
-        {"annotations", json::array()},
-        {"logprobs",    json::array()},
-        {"text",        oaicompat_msg.content}
-    };
+    if (oaicompat_msg.content != "") {
+        server_sent_events.push_back(json {
+            {"event", "response.output_text.done"},
+            {"data", json {
+                {"type", "response.output_text.done"},
+                {"text", oaicompat_msg.content}
+            }}
+        });
 
-    server_sent_events.push_back(json {
-        {"event", "response.content_part.done"},
-        {"data", json {
-            {"type", "response.content_part.done"},
-            {"part", part}
-        }}
-    });
+        const json part = {
+            {"type",        "output_text"},
+            {"annotations", json::array()},
+            {"logprobs",    json::array()},
+            {"text",        oaicompat_msg.content}
+        };
 
-    const json item = {
-        {"type",    "message"},
-        {"status",  "completed"},
-        {"content", json::array({part})},
-        {"role",    "assistant"}
-    };
+        server_sent_events.push_back(json {
+            {"event", "response.content_part.done"},
+            {"data", json {
+                {"type", "response.content_part.done"},
+                {"part", part}
+            }}
+        });
+        const json item = {
+            {"type",    "message"},
+            {"status",  "completed"},
+            {"content", json::array({part})},
+            {"role",    "assistant"}
+        };
 
-    server_sent_events.push_back(json {
-        {"event", "response.output_item.done"},
-        {"data", json {
-            {"type", "response.output_item.done"},
-            {"item", item}
-        }}
-    });
+        server_sent_events.push_back(json {
+            {"event", "response.output_item.done"},
+            {"data", json {
+                {"type", "response.output_item.done"},
+                {"item", item}
+            }}
+        });
+        output.push_back(item);
+    }
 
     std::time_t t = std::time(0);
     server_sent_events.push_back(json {
@@ -902,11 +927,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         {"data", json {
             {"type", "response.completed"},
             {"response", json {
+                {"id",         "resp_dummy_id"},
                 {"object",     "response"},
                 {"created_at", t},
                 {"status",     "completed"},
                 {"model",      oaicompat_model},
-                {"output",     json::array({item})},
+                {"output",     output},
                 {"usage",      json {
                     {"input_tokens",  n_prompt_tokens},
                     {"output_tokens", n_decoded},
@@ -1268,6 +1294,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                 }}
             });
         }
+        if (!diff.tool_call_delta.name.empty()) {
+            deltas.push_back(json {
+                {"event", "response.output_item.added"},
+                {"data", json {
+                    {"type",  "response.output_item.added"},
+                    {"item", json {
+                        {"arguments", ""},
+                        {"call_id",   "call_id_dummy"},
+                        {"name",      diff.tool_call_delta.name},
+                        {"type",      "function_call"},
+                        {"status",    "in_progress"}
+                    }}
+                }}
+            });
+        }
+        if (!diff.tool_call_delta.arguments.empty()) {
+            deltas.push_back(json {
+                {"event", "response.function_call_arguments.delta"},
+                {"data", json {
+                    {"type",  "response.function_call_arguments.delta"},
+                    {"delta", diff.tool_call_delta.arguments}
+                }}
+            });
+        }
         if (!diff.content_delta.empty()) {
             deltas.push_back(json {
                 {"event", "response.output_text.delta"},

From 6e47dea6cb418289d3691f10fbcdd344af9947ec Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Fri, 9 Jan 2026 08:00:21 +0000
Subject: [PATCH 06/23] Erase instructions field from chatcmpl body

---
 tools/server/server-common.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 0cb1b620a8..a2d1d03526 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1078,6 +1078,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     }
 
     const json input_value = response_body.at("input");
+    json chatcmpl_body = response_body;
+    chatcmpl_body.erase("input");
     json chatcmpl_messages = json::array();
 
     const std::string instructions = json_value(response_body, "instructions", std::string());
@@ -1086,6 +1088,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
             {"role",    "system"},
             {"content", instructions},
         });
+        chatcmpl_body.erase("instructions");
     }
 
     if (input_value.is_string()) {
@@ -1261,8 +1264,6 @@ json convert_responses_to_chatcmpl(const json & response_body) {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
-    json chatcmpl_body = response_body;
-    chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;
 
     if (response_body.contains("tools")) {

From 313ea1e871e9307b92bb18869a82475d519ae064 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Fri, 16 Jan 2026 11:19:44 +0000
Subject: [PATCH 07/23] Feed reasoning texts to chat template

---
 tools/server/server-common.cpp | 49 +++++++++++++++++++++++++++++-----
 tools/server/server-task.cpp   | 43 ++++++++++++++++++++---------
 2 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index a2d1d03526..362c312d4c 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1080,7 +1080,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     const json input_value = response_body.at("input");
     json chatcmpl_body = response_body;
     chatcmpl_body.erase("input");
-    json chatcmpl_messages = json::array();
+    std::vector<json> chatcmpl_messages;
 
     const std::string instructions = json_value(response_body, "instructions", std::string());
     if (instructions != "") {
@@ -1183,10 +1183,11 @@ json convert_responses_to_chatcmpl(const json & response_body) {
             } else if (exists_and_is_array(item, "content") &&
                 exists_and_is_string(item, "role") &&
                 item.at("role") == "assistant" &&
-                exists_and_is_string(item, "status") &&
-                (item.at("status") == "in_progress" ||
-                    item.at("status") == "completed" ||
-                    item.at("status") == "incomplete") &&
+                // exists_and_is_string(item, "status") &&
+                // (item.at("status") == "in_progress" ||
+                //     item.at("status") == "completed" ||
+                //     item.at("status") == "incomplete") &&
+                // item["status"] not sent by codex-cli
                 exists_and_is_string(item, "type") &&
                 item.at("type") == "message"
             ) {
@@ -1219,7 +1220,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                 item.at("type") == "function_call"
             ) {
                 // #responses_create-input-input_item_list-item-function_tool_call
-                chatcmpl_messages.push_back(json {
+                json msg = json {
                     {"role", "assistant"},
                     {"tool_calls", json::array({ json {
                         {"function", json {
@@ -1229,7 +1230,14 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         {"id",   item.at("call_id")},
                         {"type", "function"}
                     }})},
-                });
+                };
+
+                if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
+                    // Move reasoning content from dummy message to tool call message
+                    msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
+                    chatcmpl_messages.pop_back();
+                }
+                chatcmpl_messages.push_back(msg);
             } else if (exists_and_is_string(item, "call_id") &&
                 (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
                 exists_and_is_string(item, "type") &&
@@ -1256,6 +1264,19 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         {"tool_call_id", item.at("call_id")}
                     });
                 }
+            } else if (// exists_and_is_string(item, "id") &&
+                // item["id"] not sent by codex-cli
+                exists_and_is_array(item, "summary") &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "reasoning") {
+                // #responses_create-input-input_item_list-item-reasoning
+
+                // Pack reasoning content in dummy message
+                chatcmpl_messages.push_back(json {
+                    {"role", "assistant"},
+                    {"content", json::array()},
+                    {"reasoning_content", item.at("content")[0].at("text")}
+                });
             } else {
                 throw std::invalid_argument("Cannot determine type of 'item'");
             }
@@ -1264,6 +1285,20 @@ json convert_responses_to_chatcmpl(const json & response_body) {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
+    // Remove unused dummy message
+    // (reasoning content not followed by tool calls)
+    chatcmpl_messages.erase(std::remove_if(
+        chatcmpl_messages.begin(),
+        chatcmpl_messages.end(),
+        [](const json & x){ return x.contains("role") &&
+            x.at("role") == "assistant" &&
+            x.contains("content") &&
+            x.at("content") == json::array() &&
+            x.contains("reasoning_content");
+        }),
+        chatcmpl_messages.end()
+    );
+
     chatcmpl_body["messages"] = chatcmpl_messages;
 
     if (response_body.contains("tools")) {
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index e78086e0aa..b372187f13 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -859,26 +859,23 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     json server_sent_events = json::array();
     json output = json::array();
 
-    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+    if (oaicompat_msg.reasoning_content != "") {
         server_sent_events.push_back(json {
             {"event", "response.output_item.done"},
             {"data", json {
                 {"type", "response.output_item.done"},
                 {"item", json {
-                    {"type",      "function_call"},
-                    {"status",    "completed"},
-                    {"arguments", tool_call.arguments},
-                    {"call_id",   "call_dummy_id"},
-                    {"name",      tool_call.name}
+                    {"id",      "rs_id(response.output_item.done)"},
+                    {"summary", json::array()},
+                    {"type",    "reasoning"},
+                    {"content", json::array({json {
+                        {"text", oaicompat_msg.reasoning_content},
+                        {"type", "reasoning_text"},
+                    }})},
+                    {"encrypted_content", ""},
                 }}
             }}
         });
-        output.push_back({
-            {"type",      "function_call"},
-            {"status",    "completed"},
-            {"arguments", tool_call.arguments},
-            {"name",      tool_call.name}
-        });
     }
 
     if (oaicompat_msg.content != "") {
@@ -921,6 +918,28 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         output.push_back(item);
     }
 
+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+        server_sent_events.push_back(json {
+            {"event", "response.output_item.done"},
+            {"data", json {
+                {"type", "response.output_item.done"},
+                {"item", json {
+                    {"type",      "function_call"},
+                    {"status",    "completed"},
+                    {"arguments", tool_call.arguments},
+                    {"call_id",   "call_dummy_id"},
+                    {"name",      tool_call.name}
+                }}
+            }}
+        });
+        output.push_back({
+            {"type",      "function_call"},
+            {"status",    "completed"},
+            {"arguments", tool_call.arguments},
+            {"name",      tool_call.name}
+        });
+    }
+
     std::time_t t = std::time(0);
     server_sent_events.push_back(json {
         {"event", "response.completed"},

From 7d7058bbcbc53c65d7e58706f64e782221cf16ca Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Fri, 16 Jan 2026 11:33:37 +0000
Subject: [PATCH 08/23] Use std::vector instead of opaque json array

---
 tools/server/server-common.cpp | 6 +++---
 tools/server/server-task.cpp   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 362c312d4c..5aff08f0c1 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1128,7 +1128,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     item.at("role") == "developer")
             ) {
                 // #responses_create-input-input_item_list-item-input_message
-                json chatcmpl_content = json::array();
+                std::vector<json> chatcmpl_content;
 
                 for (const json & input_item : item.at("content")) {
                     const std::string type = json_value(input_item, "type", std::string());
@@ -1192,7 +1192,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                 item.at("type") == "message"
             ) {
                 // #responses_create-input-input_item_list-item-output_message
-                json chatcmpl_content = json::array();
+                std::vector<json> chatcmpl_content;
 
                 for (const auto & output_text : item.at("content")) {
                     const std::string type = json_value(output_text, "type", std::string());
@@ -1305,7 +1305,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
         if (!response_body.at("tools").is_array()) {
             throw std::invalid_argument("'tools' must be an array of objects");
         }
-        json chatcmpl_tools = json::array();
+        std::vector<json> chatcmpl_tools;
         for (json resp_tool : response_body.at("tools")) {
             json chatcmpl_tool;
 
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index b372187f13..47bd1d8a6e 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -856,8 +856,8 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
-    json server_sent_events = json::array();
-    json output = json::array();
+    std::vector<json> server_sent_events;
+    std::vector<json> output;
 
     if (oaicompat_msg.reasoning_content != "") {
         server_sent_events.push_back(json {

From e550290deda2586babade0d0b9d28fb07ba3aa06 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Sat, 17 Jan 2026 01:44:18 +0000
Subject: [PATCH 09/23] Make output_item.added events consistent

---
 tools/server/server-context.cpp |  52 +----------
 tools/server/server-task.cpp    |  53 +----------
 tools/server/server-task.h      | 157 +++++++++++++++++++++++++++++++-
 3 files changed, 157 insertions(+), 105 deletions(-)

diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index c84e36c4a6..3e3b230591 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -3074,57 +3074,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
         if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
             res->data = format_anthropic_sse(first_result_json);
         } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
-            const json created = {
-                {"event", "response.created"},
-                {"data", json {
-                    {"type", "response.created"},
-                    {"response", json {
-                        {"object", "response"},
-                        {"status", "in_progress"}
-                    }}
-                }}
-            };
-            const json in_progress = {
-                {"event", "response.in_progress"},
-                {"data", json {
-                    {"type", "response.in_progress"},
-                    {"response", json {
-                        {"object", "response"},
-                        {"status", "in_progress"}
-                    }}
-                }}
-            };
-            const json output_item_added = {
-                {"event", "response.output_item.added"},
-                {"data", json {
-                    {"type", "response.output_item.added"},
-                    {"item", json {
-                        {"type", "message"},
-                        {"status", "in_progress"},
-                        {"content", json::array()},
-                        {"role", "assistant"}
-                    }}
-                }}
-            };
-            const json content_part_added = {
-                {"event", "response.content_part.added"},
-                {"data", json {
-                    {"type", "response.content_part.added"},
-                    {"part", json {
-                        {"type", "output_text"},
-                        {"text", ""}
-                    }}
-                }}
-            };
-
-            const json initial_events = json::array({
-                created,
-                in_progress,
-                output_item_added,
-                content_part_added
-            });
-
-            res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json);
+            res->data = format_oai_resp_sse(first_result_json);
         } else {
             res->data = format_oai_sse(first_result_json);
         }
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 47bd1d8a6e..bf986cfe8c 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1230,7 +1230,7 @@ json server_task_result_cmpl_partial::to_json() {
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return to_json_oaicompat_chat();
         case TASK_RESPONSE_TYPE_OAI_RESP:
-            return to_json_oaicompat_resp();
+            return openai_responses_current_events;
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return to_json_anthropic();
         default:
@@ -1300,57 +1300,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
     return res;
 }
 
-json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
-    std::vector<json> deltas;
-
-    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
-        if (!diff.reasoning_content_delta.empty()) {
-            deltas.push_back(json {
-                {"event", "response.reasoning_text.delta"},
-                {"data", json {
-                    {"type",  "response.reasoning_text.delta"},
-                    {"delta", diff.reasoning_content_delta}
-                }}
-            });
-        }
-        if (!diff.tool_call_delta.name.empty()) {
-            deltas.push_back(json {
-                {"event", "response.output_item.added"},
-                {"data", json {
-                    {"type",  "response.output_item.added"},
-                    {"item", json {
-                        {"arguments", ""},
-                        {"call_id",   "call_id_dummy"},
-                        {"name",      diff.tool_call_delta.name},
-                        {"type",      "function_call"},
-                        {"status",    "in_progress"}
-                    }}
-                }}
-            });
-        }
-        if (!diff.tool_call_delta.arguments.empty()) {
-            deltas.push_back(json {
-                {"event", "response.function_call_arguments.delta"},
-                {"data", json {
-                    {"type",  "response.function_call_arguments.delta"},
-                    {"delta", diff.tool_call_delta.arguments}
-                }}
-            });
-        }
-        if (!diff.content_delta.empty()) {
-            deltas.push_back(json {
-                {"event", "response.output_text.delta"},
-                {"data", json {
-                    {"type",  "response.output_text.delta"},
-                    {"delta", diff.content_delta}
-                }}
-            });
-        }
-    }
-
-    return deltas;
-}
-
 json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
     bool first = n_decoded == 1;
     std::time_t t = std::time(0);
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 5c71bb6d21..396cb124d1 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -99,6 +99,10 @@ struct task_result_state {
     std::string generated_text; // append new chunks of generated text here
     std::vector<std::string> generated_tool_call_ids;
 
+    // for OpenAI Responses API
+    // contains "resp_...", "rs_...", "fc_...", and "msg_..." generated during streaming
+    std::vector<std::string> openai_responses_item_ids;
+
     // for Anthropic API streaming: track content block state across chunks
     bool anthropic_thinking_block_started = false;
     bool anthropic_text_block_started = false;
@@ -402,6 +406,9 @@ struct server_task_result_cmpl_partial : server_task_result {
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
 
+    // for OpenAI Responses API: Events emitted by current chunk
+    std::vector<json> openai_responses_current_events;
+
     // for Anthropic API: track if any reasoning content has been generated
     bool anthropic_has_reasoning = false;
     // Streaming state copied from task_result_state for this chunk
@@ -417,6 +424,154 @@ struct server_task_result_cmpl_partial : server_task_result {
     virtual void update(task_result_state & state) override {
         is_updated = true;
         state.update_chat_msg(content, true, oaicompat_msg_diffs);
+
+        if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+            if (state.openai_responses_item_ids.empty()) {
+                // Create response object
+                const std::string response_id = "resp_" + random_string();
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.created"},
+                    {"data", json {
+                        {"type", "response.created"},
+                        {"response", json {
+                            {"id",     response_id},
+                            {"object", "response"},
+                            {"status", "in_progress"},
+                        }},
+                    }},
+                });
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.in_progress"},
+                    {"data", json {
+                        {"type", "response.in_progress"},
+                        {"response", json {
+                            {"id",     response_id},
+                            {"object", "response"},
+                            {"status", "in_progress"},
+                        }},
+                    }},
+                });
+                state.openai_responses_item_ids.push_back(response_id);
+            }
+
+            for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+                if (!diff.reasoning_content_delta.empty()) {
+                    std::string resoning_id;
+                    const std::string prev_item_id = state.openai_responses_item_ids.back();
+                    if (string_starts_with(prev_item_id, "rs_")) {
+                        resoning_id = state.openai_responses_item_ids.back();
+                    } else {
+                        // Add new reasoning output_item
+
+                        GGML_ASSERT(string_starts_with(prev_item_id, "resp_"));
+                        // Reasoning item should be generated right after the reposonse object is created
+
+                        resoning_id = "rs_" + random_string();
+                        openai_responses_current_events.push_back(json {
+                            {"event", "response.output_item.added"},
+                            {"data", json {
+                                {"type", "response.output_item.added"},
+                                {"item", json {
+                                    {"id",                resoning_id},
+                                    {"summary",           json::array()},
+                                    {"type",              "reasoning"},
+                                    {"content",           json::array()},
+                                    {"encrypted_content", ""},
+                                    {"status",            "in_progress"},
+                                }},
+                            }},
+                        });
+                        state.openai_responses_item_ids.push_back(resoning_id);
+                    }
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.reasoning_text.delta"},
+                        {"data", json {
+                            {"delta",   diff.reasoning_content_delta},
+                            {"item_id", resoning_id},
+                            {"type",    "response.reasoning_text.delta"},
+                        }},
+                    });
+                }
+                if (!diff.tool_call_delta.name.empty()) {
+                    // Add new function call output_item
+
+                    const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back();
+                    state.generated_tool_call_ids.back() = function_call_id;
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.output_item.added"},
+                        {"data", json {
+                            {"type",  "response.output_item.added"},
+                            {"item", json {
+                                {"arguments", ""},
+                                {"call_id",   function_call_id},
+                                {"name",      diff.tool_call_delta.name},
+                                {"type",      "function_call"},
+                                {"status",    "in_progress"},
+                            }},
+                        }},
+                    });
+                    state.openai_responses_item_ids.push_back(function_call_id);
+                }
+                if (!diff.tool_call_delta.arguments.empty()) {
+                    const std::string prev_item_id = state.openai_responses_item_ids.back();
+                    GGML_ASSERT(string_starts_with(prev_item_id, "fc_"));
+
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.function_call_arguments.delta"},
+                        {"data", json {
+                            {"delta",   diff.tool_call_delta.arguments},
+                            {"item_id", prev_item_id},
+                            {"type",    "response.function_call_arguments.delta"},
+                        }},
+                    });
+                }
+                if (!diff.content_delta.empty()) {
+                    std::string message_id;
+                    if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) {
+                        message_id = state.openai_responses_item_ids.back();
+                    } else {
+                        message_id = "msg_" + random_string();
+                        openai_responses_current_events.push_back(json {
+                            {"event", "response.output_item.added"},
+                            {"data", json {
+                                {"type", "response.output_item.added"},
+                                {"item", json {
+                                    {"content", json::array()},
+                                    {"id",      message_id},
+                                    {"role",    "assistant"},
+                                    {"status",  "in_progress"},
+                                    {"type",    "message"},
+                                }},
+                            }},
+                        });
+                        openai_responses_current_events.push_back(json {
+                            {"event", "response.content_part.added"},
+                            {"data", json {
+                                {"type",    "response.content_part.added"},
+                                {"item_id", message_id},
+                                {"part", json {
+                                    {"type", "output_text"},
+                                    {"text", ""},
+                                }},
+                            }},
+                        });
+                        state.openai_responses_item_ids.push_back(message_id);
+                    }
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.output_text.delta"},
+                        {"data", json {
+                            {"type",    "response.output_text.delta"},
+                            {"item_id", message_id},
+                            {"delta",   diff.content_delta},
+                        }},
+                    });
+                }
+            }
+
+            return;
+        }
+
+
         // track if the accumulated message has any reasoning content
         anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
 
@@ -441,8 +596,6 @@ struct server_task_result_cmpl_partial : server_task_result {
 
     json to_json_oaicompat_chat();
 
-    json to_json_oaicompat_resp();
-
     json to_json_anthropic();
 };
 

From 97e649e8f600bf7077baad93646113b40949e91d Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Sun, 18 Jan 2026 10:28:35 +0000
Subject: [PATCH 10/23] Move `server_task_result_cmpl_partial::update` from
 header to source

---
 tools/server/server-task.cpp | 168 ++++++++++++++++++++++++++++++++++
 tools/server/server-task.h   | 171 +----------------------------------
 2 files changed, 170 insertions(+), 169 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index bf986cfe8c..baada85e3e 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1220,6 +1220,174 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
 //
 // server_task_result_cmpl_partial
 //
+void server_task_result_cmpl_partial::update(task_result_state & state) {
+    is_updated = true;
+    state.update_chat_msg(content, true, oaicompat_msg_diffs);
+
+    if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+        if (state.openai_responses_item_ids.empty()) {
+            // Create response object
+            const std::string response_id = "resp_" + random_string();
+            openai_responses_current_events.push_back(json {
+                {"event", "response.created"},
+                {"data", json {
+                    {"type", "response.created"},
+                    {"response", json {
+                        {"id",     response_id},
+                        {"object", "response"},
+                        {"status", "in_progress"},
+                    }},
+                }},
+            });
+            openai_responses_current_events.push_back(json {
+                {"event", "response.in_progress"},
+                {"data", json {
+                    {"type", "response.in_progress"},
+                    {"response", json {
+                        {"id",     response_id},
+                        {"object", "response"},
+                        {"status", "in_progress"},
+                    }},
+                }},
+            });
+            state.openai_responses_item_ids.push_back(response_id);
+        }
+
+        for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+            if (!diff.reasoning_content_delta.empty()) {
+                std::string resoning_id;
+                const std::string prev_item_id = state.openai_responses_item_ids.back();
+                if (string_starts_with(prev_item_id, "rs_")) {
+                    resoning_id = state.openai_responses_item_ids.back();
+                } else {
+                    // Add new reasoning output_item
+
+                    GGML_ASSERT(string_starts_with(prev_item_id, "resp_"));
+                    // Reasoning item should be generated right after the reposonse object is created
+
+                    resoning_id = "rs_" + random_string();
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.output_item.added"},
+                        {"data", json {
+                            {"type", "response.output_item.added"},
+                            {"item", json {
+                                {"id",                resoning_id},
+                                {"summary",           json::array()},
+                                {"type",              "reasoning"},
+                                {"content",           json::array()},
+                                {"encrypted_content", ""},
+                                {"status",            "in_progress"},
+                            }},
+                        }},
+                    });
+                    state.openai_responses_item_ids.push_back(resoning_id);
+                }
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.reasoning_text.delta"},
+                    {"data", json {
+                        {"delta",   diff.reasoning_content_delta},
+                        {"item_id", resoning_id},
+                        {"type",    "response.reasoning_text.delta"},
+                    }},
+                });
+            }
+            if (!diff.content_delta.empty()) {
+                std::string message_id;
+                if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) {
+                    message_id = state.openai_responses_item_ids.back();
+                } else {
+                    message_id = "msg_" + random_string();
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.output_item.added"},
+                        {"data", json {
+                            {"type", "response.output_item.added"},
+                            {"item", json {
+                                {"content", json::array()},
+                                {"id",      message_id},
+                                {"role",    "assistant"},
+                                {"status",  "in_progress"},
+                                {"type",    "message"},
+                            }},
+                        }},
+                    });
+                    openai_responses_current_events.push_back(json {
+                        {"event", "response.content_part.added"},
+                        {"data", json {
+                            {"type",    "response.content_part.added"},
+                            {"item_id", message_id},
+                            {"part", json {
+                                {"type", "output_text"},
+                                {"text", ""},
+                            }},
+                        }},
+                    });
+                    state.openai_responses_item_ids.push_back(message_id);
+                }
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.output_text.delta"},
+                    {"data", json {
+                        {"type",    "response.output_text.delta"},
+                        {"item_id", message_id},
+                        {"delta",   diff.content_delta},
+                    }},
+                });
+            }
+            if (!diff.tool_call_delta.name.empty()) {
+                // Add new function call output_item
+
+                const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back();
+                state.generated_tool_call_ids.back() = function_call_id;
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.output_item.added"},
+                    {"data", json {
+                        {"type",  "response.output_item.added"},
+                        {"item", json {
+                            {"arguments", ""},
+                            {"call_id",   function_call_id},
+                            {"name",      diff.tool_call_delta.name},
+                            {"type",      "function_call"},
+                            {"status",    "in_progress"},
+                        }},
+                    }},
+                });
+                state.openai_responses_item_ids.push_back(function_call_id);
+            }
+            if (!diff.tool_call_delta.arguments.empty()) {
+                const std::string prev_item_id = state.openai_responses_item_ids.back();
+                GGML_ASSERT(string_starts_with(prev_item_id, "fc_"));
+
+                openai_responses_current_events.push_back(json {
+                    {"event", "response.function_call_arguments.delta"},
+                    {"data", json {
+                        {"delta",   diff.tool_call_delta.arguments},
+                        {"item_id", prev_item_id},
+                        {"type",    "response.function_call_arguments.delta"},
+                    }},
+                });
+            }
+        }
+
+        return;
+    }
+
+    // track if the accumulated message has any reasoning content
+    anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
+
+    // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk)
+    anthropic_thinking_block_started = state.anthropic_thinking_block_started;
+    anthropic_text_block_started = state.anthropic_text_block_started;
+
+    // Pre-compute state updates based on diffs (for next chunk)
+    for (const auto & diff : oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) {
+            state.anthropic_thinking_block_started = true;
+        }
+        if (!diff.content_delta.empty() && !state.anthropic_text_block_started) {
+            state.anthropic_text_block_started = true;
+        }
+    }
+}
+
 json server_task_result_cmpl_partial::to_json() {
     GGML_ASSERT(is_updated && "update() must be called before to_json()");
     switch (res_type) {
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 396cb124d1..debc4f4bbf 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -419,176 +419,9 @@ struct server_task_result_cmpl_partial : server_task_result {
         return false; // in stream mode, partial responses are not considered stop
     }
 
-    virtual json to_json() override;
-
-    virtual void update(task_result_state & state) override {
-        is_updated = true;
-        state.update_chat_msg(content, true, oaicompat_msg_diffs);
-
-        if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
-            if (state.openai_responses_item_ids.empty()) {
-                // Create response object
-                const std::string response_id = "resp_" + random_string();
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.created"},
-                    {"data", json {
-                        {"type", "response.created"},
-                        {"response", json {
-                            {"id",     response_id},
-                            {"object", "response"},
-                            {"status", "in_progress"},
-                        }},
-                    }},
-                });
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.in_progress"},
-                    {"data", json {
-                        {"type", "response.in_progress"},
-                        {"response", json {
-                            {"id",     response_id},
-                            {"object", "response"},
-                            {"status", "in_progress"},
-                        }},
-                    }},
-                });
-                state.openai_responses_item_ids.push_back(response_id);
-            }
-
-            for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
-                if (!diff.reasoning_content_delta.empty()) {
-                    std::string resoning_id;
-                    const std::string prev_item_id = state.openai_responses_item_ids.back();
-                    if (string_starts_with(prev_item_id, "rs_")) {
-                        resoning_id = state.openai_responses_item_ids.back();
-                    } else {
-                        // Add new reasoning output_item
-
-                        GGML_ASSERT(string_starts_with(prev_item_id, "resp_"));
-                        // Reasoning item should be generated right after the reposonse object is created
-
-                        resoning_id = "rs_" + random_string();
-                        openai_responses_current_events.push_back(json {
-                            {"event", "response.output_item.added"},
-                            {"data", json {
-                                {"type", "response.output_item.added"},
-                                {"item", json {
-                                    {"id",                resoning_id},
-                                    {"summary",           json::array()},
-                                    {"type",              "reasoning"},
-                                    {"content",           json::array()},
-                                    {"encrypted_content", ""},
-                                    {"status",            "in_progress"},
-                                }},
-                            }},
-                        });
-                        state.openai_responses_item_ids.push_back(resoning_id);
-                    }
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.reasoning_text.delta"},
-                        {"data", json {
-                            {"delta",   diff.reasoning_content_delta},
-                            {"item_id", resoning_id},
-                            {"type",    "response.reasoning_text.delta"},
-                        }},
-                    });
-                }
-                if (!diff.tool_call_delta.name.empty()) {
-                    // Add new function call output_item
-
-                    const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back();
-                    state.generated_tool_call_ids.back() = function_call_id;
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.output_item.added"},
-                        {"data", json {
-                            {"type",  "response.output_item.added"},
-                            {"item", json {
-                                {"arguments", ""},
-                                {"call_id",   function_call_id},
-                                {"name",      diff.tool_call_delta.name},
-                                {"type",      "function_call"},
-                                {"status",    "in_progress"},
-                            }},
-                        }},
-                    });
-                    state.openai_responses_item_ids.push_back(function_call_id);
-                }
-                if (!diff.tool_call_delta.arguments.empty()) {
-                    const std::string prev_item_id = state.openai_responses_item_ids.back();
-                    GGML_ASSERT(string_starts_with(prev_item_id, "fc_"));
-
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.function_call_arguments.delta"},
-                        {"data", json {
-                            {"delta",   diff.tool_call_delta.arguments},
-                            {"item_id", prev_item_id},
-                            {"type",    "response.function_call_arguments.delta"},
-                        }},
-                    });
-                }
-                if (!diff.content_delta.empty()) {
-                    std::string message_id;
-                    if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) {
-                        message_id = state.openai_responses_item_ids.back();
-                    } else {
-                        message_id = "msg_" + random_string();
-                        openai_responses_current_events.push_back(json {
-                            {"event", "response.output_item.added"},
-                            {"data", json {
-                                {"type", "response.output_item.added"},
-                                {"item", json {
-                                    {"content", json::array()},
-                                    {"id",      message_id},
-                                    {"role",    "assistant"},
-                                    {"status",  "in_progress"},
-                                    {"type",    "message"},
-                                }},
-                            }},
-                        });
-                        openai_responses_current_events.push_back(json {
-                            {"event", "response.content_part.added"},
-                            {"data", json {
-                                {"type",    "response.content_part.added"},
-                                {"item_id", message_id},
-                                {"part", json {
-                                    {"type", "output_text"},
-                                    {"text", ""},
-                                }},
-                            }},
-                        });
-                        state.openai_responses_item_ids.push_back(message_id);
-                    }
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.output_text.delta"},
-                        {"data", json {
-                            {"type",    "response.output_text.delta"},
-                            {"item_id", message_id},
-                            {"delta",   diff.content_delta},
-                        }},
-                    });
-                }
-            }
-
-            return;
-        }
+    virtual void update(task_result_state & state) override;
 
-
-        // track if the accumulated message has any reasoning content
-        anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
-
-        // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk)
-        anthropic_thinking_block_started = state.anthropic_thinking_block_started;
-        anthropic_text_block_started = state.anthropic_text_block_started;
-
-        // Pre-compute state updates based on diffs (for next chunk)
-        for (const auto & diff : oaicompat_msg_diffs) {
-            if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) {
-                state.anthropic_thinking_block_started = true;
-            }
-            if (!diff.content_delta.empty() && !state.anthropic_text_block_started) {
-                state.anthropic_text_block_started = true;
-            }
-        }
-    }
+    virtual json to_json() override;
 
     json to_json_non_oaicompat();
 

From d9dca02943a1e37f003a93dc4e63d213c2f3f8d6 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Sun, 18 Jan 2026 11:28:24 +0000
Subject: [PATCH 11/23] Match ID of output_item.added and .done events

---
 tools/server/server-task.cpp | 83 ++++++++++++++++++++++--------------
 tools/server/server-task.h   |  4 ++
 2 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index baada85e3e..691275a61f 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -860,34 +860,53 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     std::vector<json> output;
 
     if (oaicompat_msg.reasoning_content != "") {
+        const auto reasoning_id_it = std::find_if(
+            openai_responses_item_ids.begin(),
+            openai_responses_item_ids.end(),
+            [](const std::string & id){ return string_starts_with(id, "rs_"); }
+        );
+        GGML_ASSERT(reasoning_id_it != openai_responses_item_ids.end());
+        const std::string reasoning_id = *reasoning_id_it;
+
+        const json output_item = json {
+            {"id",      reasoning_id},
+            {"summary", json::array()},
+            {"type",    "reasoning"},
+            {"content", json::array({json {
+                {"text", oaicompat_msg.reasoning_content},
+                {"type", "reasoning_text"},
+            }})},
+            {"encrypted_content", ""},
+        };
+
         server_sent_events.push_back(json {
             {"event", "response.output_item.done"},
             {"data", json {
                 {"type", "response.output_item.done"},
-                {"item", json {
-                    {"id",      "rs_id(response.output_item.done)"},
-                    {"summary", json::array()},
-                    {"type",    "reasoning"},
-                    {"content", json::array({json {
-                        {"text", oaicompat_msg.reasoning_content},
-                        {"type", "reasoning_text"},
-                    }})},
-                    {"encrypted_content", ""},
-                }}
+                {"item", output_item}
             }}
         });
+        output.push_back(output_item);
     }
 
     if (oaicompat_msg.content != "") {
+        const auto message_id_it = std::find_if(
+            openai_responses_item_ids.begin(),
+            openai_responses_item_ids.end(),
+            [](const std::string & id){ return string_starts_with(id, "msg_"); }
+        );
+        GGML_ASSERT(message_id_it != openai_responses_item_ids.end());
+        const std::string message_id = *message_id_it;
         server_sent_events.push_back(json {
             {"event", "response.output_text.done"},
             {"data", json {
-                {"type", "response.output_text.done"},
-                {"text", oaicompat_msg.content}
+                {"type",    "response.output_text.done"},
+                {"item_id", message_id},
+                {"text",    oaicompat_msg.content}
             }}
         });
 
-        const json part = {
+        const json content_part = {
             {"type",        "output_text"},
             {"annotations", json::array()},
             {"logprobs",    json::array()},
@@ -897,14 +916,16 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         server_sent_events.push_back(json {
             {"event", "response.content_part.done"},
             {"data", json {
-                {"type", "response.content_part.done"},
-                {"part", part}
+                {"type",    "response.content_part.done"},
+                {"item_id", message_id},
+                {"part",    content_part}
             }}
         });
-        const json item = {
+        const json output_item = {
             {"type",    "message"},
             {"status",  "completed"},
-            {"content", json::array({part})},
+            {"id",      message_id},
+            {"content", json::array({content_part})},
             {"role",    "assistant"}
         };
 
@@ -912,32 +933,28 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
             {"event", "response.output_item.done"},
             {"data", json {
                 {"type", "response.output_item.done"},
-                {"item", item}
+                {"item", output_item}
             }}
         });
-        output.push_back(item);
+        output.push_back(output_item);
     }
 
     for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+        const json output_item = {
+            {"type",      "function_call"},
+            {"status",    "completed"},
+            {"arguments", tool_call.arguments},
+            {"call_id",   tool_call.id},
+            {"name",      tool_call.name}
+        };
         server_sent_events.push_back(json {
             {"event", "response.output_item.done"},
             {"data", json {
                 {"type", "response.output_item.done"},
-                {"item", json {
-                    {"type",      "function_call"},
-                    {"status",    "completed"},
-                    {"arguments", tool_call.arguments},
-                    {"call_id",   "call_dummy_id"},
-                    {"name",      tool_call.name}
-                }}
+                {"item", output_item}
             }}
         });
-        output.push_back({
-            {"type",      "function_call"},
-            {"status",    "completed"},
-            {"arguments", tool_call.arguments},
-            {"name",      tool_call.name}
-        });
+        output.push_back(output_item);
     }
 
     std::time_t t = std::time(0);
@@ -946,7 +963,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         {"data", json {
             {"type", "response.completed"},
             {"response", json {
-                {"id",         "resp_dummy_id"},
+                {"id",         openai_responses_item_ids[0]},
                 {"object",     "response"},
                 {"created_at", t},
                 {"status",     "completed"},
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index debc4f4bbf..2a44b0824b 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -357,6 +357,9 @@ struct server_task_result_cmpl_final : server_task_result {
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
 
+    // to be copied from task_result_state by update()
+    std::vector<std::string> openai_responses_item_ids;
+
     virtual bool is_stop() override {
         return true; // in stream mode, final responses are considered stop
     }
@@ -366,6 +369,7 @@ struct server_task_result_cmpl_final : server_task_result {
     virtual void update(task_result_state & state) override {
         is_updated = true;
         oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs);
+        openai_responses_item_ids = state.openai_responses_item_ids;
     }
 
     json to_json_non_oaicompat();

From cd9b4cfada55d06b384781fa30cc65dc7bc10dc9 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Sun, 18 Jan 2026 12:49:47 +0000
Subject: [PATCH 12/23] Add function_call only if there is no "fc_" prefix

---
 tools/server/server-task.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 691275a61f..4998a0980f 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1349,8 +1349,10 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
                     }},
                 });
             }
-            if (!diff.tool_call_delta.name.empty()) {
+            if (!diff.tool_call_delta.name.empty() &&
+                !string_starts_with(state.generated_tool_call_ids.back(), "fc_")) {
                 // Add new function call output_item
+                // This fails to detect new item if there are >1 consecutive function calls
 
                 const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back();
                 state.generated_tool_call_ids.back() = function_call_id;

From 6c200df3b3894b16faf3f26131504c9a5063d72d Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Mon, 19 Jan 2026 02:52:55 +0000
Subject: [PATCH 13/23] Add function call output at non-streaming API

---
 tools/server/server-task.cpp | 83 ++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 33 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 4998a0980f..459129ecec 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -812,46 +812,63 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
         msg.content = content;
     }
 
-    const json reasoning = {
-        {"type",    "reasoning"},
-        {"summary", json::array({json {
-            {"type", "summary_text"},
-            {"text", msg.reasoning_content}
-        }})}
-    };
-    const json message = {
-        {"type",    "message"},
-        {"status",  "completed"},
-        {"content", json::array({json {
-            {"type",        "output_text"},
-            {"annotations", json::array()},
-            {"logprobs",    json::array()},
-            {"text",        msg.content}
-        }})},
-        {"role", msg.role}
-    };
+    std::vector<json> output;
+
+    if (msg.reasoning_content != "") {
+        output.push_back(json {
+            {"id",      "rs_" + random_string()},
+            {"summary", json::array()},
+            {"type",    "reasoning"},
+            {"content", json::array({ json {
+                {"text", msg.reasoning_content},
+                {"type", "reasoning_text"},
+            }})},
+            {"encrypted_content", ""},
+            {"status",            "completed"},
+        });
+    }
+
+    if (msg.content != "") {
+        output.push_back(json {
+            {"content", json::array({ json {
+                {"type",        "output_text"},
+                {"annotations", json::array()},
+                {"logprobs",    json::array()},
+                {"text",        msg.content},
+            }})},
+            {"id",     "msg_" + random_string()},
+            {"role",   msg.role},
+            {"status", "completed"},
+            {"type",   "message"},
+        });
+    }
+
+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+        output.push_back(json {
+            {"type",      "function_call"},
+            {"status",    "completed"},
+            {"arguments", tool_call.arguments},
+            {"call_id",   tool_call.id},
+            {"name",      tool_call.name},
+        });
+    }
 
     std::time_t t = std::time(0);
     json res = {
-        {"object",     "response"},
-        {"created_at", t},
-        {"status",     "completed"},
-        {"model",      oaicompat_model},
-        {"output",     json::array({reasoning, message})},
-        {"usage",      json {
+        {"completed_at", t},
+        {"created_at",   t},
+        {"id",           "resp_" + random_string()},
+        {"model",        oaicompat_model},
+        {"object",       "response"},
+        {"output",       output},
+        {"status",       "completed"},
+        {"usage",        json {
             {"input_tokens",  n_prompt_tokens},
             {"output_tokens", n_decoded},
-            {"total_tokens",  n_decoded + n_prompt_tokens}
+            {"total_tokens",  n_decoded + n_prompt_tokens},
         }},
     };
 
-    if (verbose) {
-        res["__verbose"] = to_json_non_oaicompat();
-    }
-    if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
-    }
-
     return res;
 }
 
@@ -872,7 +889,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
             {"id",      reasoning_id},
             {"summary", json::array()},
             {"type",    "reasoning"},
-            {"content", json::array({json {
+            {"content", json::array({ json {
                 {"text", oaicompat_msg.reasoning_content},
                 {"type", "reasoning_text"},
             }})},

From 63c60135ab1b23bd3320ba482fc1d2cb2271c76b Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Mon, 19 Jan 2026 04:42:08 +0000
Subject: [PATCH 14/23] Test if ID is persistent

---
 .../tests/unit/test_compat_oai_responses.py   | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py
index e168f4562d..7aab4a8ba6 100644
--- a/tools/server/tests/unit/test_compat_oai_responses.py
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@@ -22,6 +22,9 @@ def test_responses_with_openai_library():
         max_output_tokens=8,
         temperature=0.8,
     )
+    assert res.id.startswith("resp_")
+    assert res.output[0].id is not None
+    assert res.output[0].id.startswith("msg_")
     assert match_regex("(Suddenly)+", res.output_text)
 
 def test_responses_stream_with_openai_library():
@@ -40,9 +43,31 @@ def test_responses_stream_with_openai_library():
     )
 
     gathered_text = ''
+    resp_id = ''
+    msg_id = ''
     for r in stream:
+        if r.type == "response.created":
+            assert r.response.id.startswith("resp_")
+            resp_id = r.response.id
+        if r.type == "response.in_progress":
+            assert r.response.id == resp_id
+        if r.type == "response.output_item.added":
+            assert r.item.id is not None
+            assert r.item.id.startswith("msg_")
+            msg_id = r.item.id
+        if (r.type == "response.content_part.added" or
+            r.type == "response.output_text.delta" or
+            r.type == "response.output_text.done" or
+            r.type == "response.content_part.done"):
+            assert r.item_id == msg_id
+        if r.type == "response.output_item.done":
+            assert r.item.id == msg_id
+
         if r.type == "response.output_text.delta":
             gathered_text += r.delta
         if r.type == "response.completed":
+            assert r.response.id.startswith("resp_")
+            assert r.response.output[0].id is not None
+            assert r.response.output[0].id.startswith("msg_")
             assert gathered_text == r.response.output_text
             assert match_regex("(Suddenly)+", r.response.output_text)

From f232a1b9bc8f23020b5e298ef4b8340e2349d030 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Mon, 19 Jan 2026 05:12:26 +0000
Subject: [PATCH 15/23] Add doc

---
 tools/server/README.md | 45 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tools/server/README.md b/tools/server/README.md
index 9fe8938768..191391a882 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -6,7 +6,7 @@ Set of LLM REST APIs and a web UI to interact with llama.cpp.
 
 **Features:**
  * LLM inference of F16 and quantized models on GPU and CPU
- * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes
+ * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions, responses, and embeddings routes
  * [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) compatible chat completions
  * Reranking endpoint (https://github.com/ggml-org/llama.cpp/pull/9510)
  * Parallel decoding with multi-user support
@@ -1267,6 +1267,49 @@ This provides information on the performance of the server. It also allows calcu
 
 The total number of tokens in context is equal to `prompt_n + cache_n + predicted_n`
 
+### POST `/v1/responses`: OpenAI-compatible Responses API
+
+*Options:*
+
+See [OpenAI Responses API documentation](https://platform.openai.com/docs/api-reference/responses).
+
+*Examples:*
+
+You can use either Python `openai` library with appropriate checkpoints:
+
+```python
+import openai
+
+client = openai.OpenAI(
+    base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
+    api_key = "sk-no-key-required"
+)
+
+response = client.responses.create(
+  model="gpt-4.1",
+  instructions="You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.",
+  input="Write a limerick about python exceptions"
+)
+
+print(response.output_text)
+```
+
+... or raw HTTP requests:
+
+```shell
+curl http://localhost:8080/v1/responses \
+-H "Content-Type: application/json" \
+-H "Authorization: Bearer no-key" \
+-d '{
+"model": "gpt-4.1",
+"instructions": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.",
+"input": "Write a limerick about python exceptions"
+}'
+```
+
+This endpoint works by converting Responses request into Chat Completions request.
+
+
 ### POST `/v1/embeddings`: OpenAI-compatible embeddings API
 
 This endpoint requires that the model uses a pooling different than type `none`. The embeddings are normalized using the Eucledian norm.

From 8a2dd2d5af9e3aca6910758abdf5e3dd3ded5463 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Mon, 19 Jan 2026 05:35:44 +0000
Subject: [PATCH 16/23] Fix style - use trailing comma

---
 tools/server/server-common.cpp | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 5aff08f0c1..f60cb3e285 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1139,7 +1139,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         }
                         chatcmpl_content.push_back({
                             {"text", input_item.at("text")},
-                            {"type", "text"}
+                            {"type", "text"},
                         });
                     } else if (type == "input_image") {
                         // While `detail` is marked as required,
@@ -1149,8 +1149,10 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                             throw std::invalid_argument("'image_url' is required");
                         }
                         chatcmpl_content.push_back({
-                            {"image_url", json {{"url", input_item.at("image_url")}}},
-                            {"type", "image_url"}
+                            {"image_url", json {
+                                {"url", input_item.at("image_url")}
+                            }},
+                            {"type", "image_url"},
                         });
                     } else if (type == "input_file") {
                         if (input_item.contains("file_url")) {
@@ -1163,8 +1165,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         chatcmpl_content.push_back({
                             {"file", json {
                                 {"file_data", input_item.at("file_data")},
-                                {"filename",  input_item.at("filename")}}},
-                            {"type", "file"}
+                                {"filename",  input_item.at("filename")},
+                            }},
+                            {"type", "file"},
                         });
                     } else {
                         throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'");
@@ -1205,7 +1208,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     // Ignore annotations and logprobs for now
                     chatcmpl_content.push_back({
                         {"text", output_text.at("text")},
-                        {"type", "text"}
+                        {"type", "text"},
                     });
                 }
 
@@ -1225,10 +1228,10 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     {"tool_calls", json::array({ json {
                         {"function", json {
                             {"arguments", item.at("arguments")},
-                            {"name",      item.at("name")}
+                            {"name",      item.at("name")},
                         }},
                         {"id",   item.at("call_id")},
-                        {"type", "function"}
+                        {"type", "function"},
                     }})},
                 };
 
@@ -1248,7 +1251,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     chatcmpl_messages.push_back(json {
                         {"content",      item.at("output")},
                         {"role",         "tool"},
-                        {"tool_call_id", item.at("call_id")}
+                        {"tool_call_id", item.at("call_id")},
                     });
                 } else {
                     json chatcmpl_outputs = item.at("output");
@@ -1261,7 +1264,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     chatcmpl_messages.push_back(json {
                         {"content",      chatcmpl_outputs},
                         {"role",         "tool"},
-                        {"tool_call_id", item.at("call_id")}
+                        {"tool_call_id", item.at("call_id")},
                     });
                 }
             } else if (// exists_and_is_string(item, "id") &&
@@ -1275,7 +1278,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                 chatcmpl_messages.push_back(json {
                     {"role", "assistant"},
                     {"content", json::array()},
-                    {"reasoning_content", item.at("content")[0].at("text")}
+                    {"reasoning_content", item.at("content")[0].at("text")},
                 });
             } else {
                 throw std::invalid_argument("Cannot determine type of 'item'");
@@ -1285,8 +1288,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
-    // Remove unused dummy message
-    // (reasoning content not followed by tool calls)
+    // Remove unused dummy message which contains
+    // reasoning content not followed by tool call
     chatcmpl_messages.erase(std::remove_if(
         chatcmpl_messages.begin(),
         chatcmpl_messages.end(),

From 42a6eb38eb362e5fb6b6e93f961912e6f6248a5e Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 02:22:15 +0000
Subject: [PATCH 17/23] Rewrite state management

---
 tools/server/server-task.cpp | 331 ++++++++++++++++-------------------
 tools/server/server-task.h   |  38 ++--
 2 files changed, 175 insertions(+), 194 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 459129ecec..75ec0f4074 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -857,7 +857,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
     json res = {
         {"completed_at", t},
         {"created_at",   t},
-        {"id",           "resp_" + random_string()},
+        {"id",           "resp_" + oaicompat_cmpl_id.substr(9)},
         {"model",        oaicompat_model},
         {"object",       "response"},
         {"output",       output},
@@ -877,16 +877,8 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     std::vector<json> output;
 
     if (oaicompat_msg.reasoning_content != "") {
-        const auto reasoning_id_it = std::find_if(
-            openai_responses_item_ids.begin(),
-            openai_responses_item_ids.end(),
-            [](const std::string & id){ return string_starts_with(id, "rs_"); }
-        );
-        GGML_ASSERT(reasoning_id_it != openai_responses_item_ids.end());
-        const std::string reasoning_id = *reasoning_id_it;
-
         const json output_item = json {
-            {"id",      reasoning_id},
+            {"id",      oai_resp_reasoning_id},
             {"summary", json::array()},
             {"type",    "reasoning"},
             {"content", json::array({ json {
@@ -907,18 +899,11 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     }
 
     if (oaicompat_msg.content != "") {
-        const auto message_id_it = std::find_if(
-            openai_responses_item_ids.begin(),
-            openai_responses_item_ids.end(),
-            [](const std::string & id){ return string_starts_with(id, "msg_"); }
-        );
-        GGML_ASSERT(message_id_it != openai_responses_item_ids.end());
-        const std::string message_id = *message_id_it;
         server_sent_events.push_back(json {
             {"event", "response.output_text.done"},
             {"data", json {
                 {"type",    "response.output_text.done"},
-                {"item_id", message_id},
+                {"item_id", oai_resp_message_id},
                 {"text",    oaicompat_msg.content}
             }}
         });
@@ -934,14 +919,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
             {"event", "response.content_part.done"},
             {"data", json {
                 {"type",    "response.content_part.done"},
-                {"item_id", message_id},
+                {"item_id", oai_resp_message_id},
                 {"part",    content_part}
             }}
         });
         const json output_item = {
             {"type",    "message"},
             {"status",  "completed"},
-            {"id",      message_id},
+            {"id",      oai_resp_message_id},
             {"content", json::array({content_part})},
             {"role",    "assistant"}
         };
@@ -961,7 +946,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
             {"type",      "function_call"},
             {"status",    "completed"},
             {"arguments", tool_call.arguments},
-            {"call_id",   tool_call.id},
+            {"call_id",   "fc_" + tool_call.id},
             {"name",      tool_call.name}
         };
         server_sent_events.push_back(json {
@@ -980,7 +965,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         {"data", json {
             {"type", "response.completed"},
             {"response", json {
-                {"id",         openai_responses_item_ids[0]},
+                {"id",         "resp_" + oaicompat_cmpl_id.substr(9)},
                 {"object",     "response"},
                 {"created_at", t},
                 {"status",     "completed"},
@@ -1258,168 +1243,27 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
     is_updated = true;
     state.update_chat_msg(content, true, oaicompat_msg_diffs);
 
-    if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
-        if (state.openai_responses_item_ids.empty()) {
-            // Create response object
-            const std::string response_id = "resp_" + random_string();
-            openai_responses_current_events.push_back(json {
-                {"event", "response.created"},
-                {"data", json {
-                    {"type", "response.created"},
-                    {"response", json {
-                        {"id",     response_id},
-                        {"object", "response"},
-                        {"status", "in_progress"},
-                    }},
-                }},
-            });
-            openai_responses_current_events.push_back(json {
-                {"event", "response.in_progress"},
-                {"data", json {
-                    {"type", "response.in_progress"},
-                    {"response", json {
-                        {"id",     response_id},
-                        {"object", "response"},
-                        {"status", "in_progress"},
-                    }},
-                }},
-            });
-            state.openai_responses_item_ids.push_back(response_id);
-        }
-
-        for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
-            if (!diff.reasoning_content_delta.empty()) {
-                std::string resoning_id;
-                const std::string prev_item_id = state.openai_responses_item_ids.back();
-                if (string_starts_with(prev_item_id, "rs_")) {
-                    resoning_id = state.openai_responses_item_ids.back();
-                } else {
-                    // Add new reasoning output_item
-
-                    GGML_ASSERT(string_starts_with(prev_item_id, "resp_"));
-                    // Reasoning item should be generated right after the reposonse object is created
-
-                    resoning_id = "rs_" + random_string();
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.output_item.added"},
-                        {"data", json {
-                            {"type", "response.output_item.added"},
-                            {"item", json {
-                                {"id",                resoning_id},
-                                {"summary",           json::array()},
-                                {"type",              "reasoning"},
-                                {"content",           json::array()},
-                                {"encrypted_content", ""},
-                                {"status",            "in_progress"},
-                            }},
-                        }},
-                    });
-                    state.openai_responses_item_ids.push_back(resoning_id);
-                }
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.reasoning_text.delta"},
-                    {"data", json {
-                        {"delta",   diff.reasoning_content_delta},
-                        {"item_id", resoning_id},
-                        {"type",    "response.reasoning_text.delta"},
-                    }},
-                });
-            }
-            if (!diff.content_delta.empty()) {
-                std::string message_id;
-                if (string_starts_with(state.openai_responses_item_ids.back(), "msg_")) {
-                    message_id = state.openai_responses_item_ids.back();
-                } else {
-                    message_id = "msg_" + random_string();
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.output_item.added"},
-                        {"data", json {
-                            {"type", "response.output_item.added"},
-                            {"item", json {
-                                {"content", json::array()},
-                                {"id",      message_id},
-                                {"role",    "assistant"},
-                                {"status",  "in_progress"},
-                                {"type",    "message"},
-                            }},
-                        }},
-                    });
-                    openai_responses_current_events.push_back(json {
-                        {"event", "response.content_part.added"},
-                        {"data", json {
-                            {"type",    "response.content_part.added"},
-                            {"item_id", message_id},
-                            {"part", json {
-                                {"type", "output_text"},
-                                {"text", ""},
-                            }},
-                        }},
-                    });
-                    state.openai_responses_item_ids.push_back(message_id);
-                }
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.output_text.delta"},
-                    {"data", json {
-                        {"type",    "response.output_text.delta"},
-                        {"item_id", message_id},
-                        {"delta",   diff.content_delta},
-                    }},
-                });
-            }
-            if (!diff.tool_call_delta.name.empty() &&
-                !string_starts_with(state.generated_tool_call_ids.back(), "fc_")) {
-                // Add new function call output_item
-                // This fails to detect new item if there are >1 consecutive function calls
-
-                const std::string function_call_id = "fc_" + state.generated_tool_call_ids.back();
-                state.generated_tool_call_ids.back() = function_call_id;
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.output_item.added"},
-                    {"data", json {
-                        {"type",  "response.output_item.added"},
-                        {"item", json {
-                            {"arguments", ""},
-                            {"call_id",   function_call_id},
-                            {"name",      diff.tool_call_delta.name},
-                            {"type",      "function_call"},
-                            {"status",    "in_progress"},
-                        }},
-                    }},
-                });
-                state.openai_responses_item_ids.push_back(function_call_id);
-            }
-            if (!diff.tool_call_delta.arguments.empty()) {
-                const std::string prev_item_id = state.openai_responses_item_ids.back();
-                GGML_ASSERT(string_starts_with(prev_item_id, "fc_"));
-
-                openai_responses_current_events.push_back(json {
-                    {"event", "response.function_call_arguments.delta"},
-                    {"data", json {
-                        {"delta",   diff.tool_call_delta.arguments},
-                        {"item_id", prev_item_id},
-                        {"type",    "response.function_call_arguments.delta"},
-                    }},
-                });
-            }
-        }
+    // Copy current state for use in to_json_*() (reflects state BEFORE this chunk)
+    thinking_block_started = state.thinking_block_started;
+    text_block_started = state.text_block_started;
 
-        return;
-    }
+    oai_resp_reasoning_id = state.oai_resp_reasoning_id;
+    oai_resp_message_id   = state.oai_resp_message_id;
+    oai_resp_fc_id        = state.oai_resp_fc_id;
 
     // track if the accumulated message has any reasoning content
     anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
 
-    // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk)
-    anthropic_thinking_block_started = state.anthropic_thinking_block_started;
-    anthropic_text_block_started = state.anthropic_text_block_started;
-
     // Pre-compute state updates based on diffs (for next chunk)
-    for (const auto & diff : oaicompat_msg_diffs) {
-        if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) {
-            state.anthropic_thinking_block_started = true;
+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
+            state.thinking_block_started = true;
+        }
+        if (!diff.content_delta.empty() && !state.text_block_started) {
+            state.text_block_started = true;
         }
-        if (!diff.content_delta.empty() && !state.anthropic_text_block_started) {
-            state.anthropic_text_block_started = true;
+        if (!diff.tool_call_delta.name.empty()) {
+            state.oai_resp_fc_id = diff.tool_call_delta.id;
         }
     }
 }
@@ -1434,7 +1278,7 @@ json server_task_result_cmpl_partial::to_json() {
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return to_json_oaicompat_chat();
         case TASK_RESPONSE_TYPE_OAI_RESP:
-            return openai_responses_current_events;
+            return to_json_oaicompat_resp();
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return to_json_anthropic();
         default:
@@ -1559,6 +1403,133 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
     return deltas;
 }
 
+json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
+    std::vector<json> events;
+
+    if (n_decoded == 1) {
+        const std::string response_id = "resp_" + oaicompat_cmpl_id.substr(9);
+        events.push_back(json {
+            {"event", "response.created"},
+            {"data", json {
+                {"type", "response.created"},
+                {"response", json {
+                    {"id",     response_id},
+                    {"object", "response"},
+                    {"status", "in_progress"},
+                }},
+            }},
+        });
+        events.push_back(json {
+            {"event", "response.in_progress"},
+            {"data", json {
+                {"type", "response.in_progress"},
+                {"response", json {
+                    {"id",     response_id},
+                    {"object", "response"},
+                    {"status", "in_progress"},
+                }},
+            }},
+        });
+    }
+
+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty()) {
+            if (!thinking_block_started) {
+                events.push_back(json {
+                    {"event", "response.output_item.added"},
+                    {"data", json {
+                        {"type", "response.output_item.added"},
+                        {"item", json {
+                            {"id",                oai_resp_reasoning_id},
+                            {"summary",           json::array()},
+                            {"type",              "reasoning"},
+                            {"content",           json::array()},
+                            {"encrypted_content", ""},
+                            {"status",            "in_progress"},
+                        }},
+                    }},
+                });
+                thinking_block_started = true;
+            }
+            events.push_back(json {
+                {"event", "response.reasoning_text.delta"},
+                {"data", json {
+                    {"delta",   diff.reasoning_content_delta},
+                    {"item_id", oai_resp_reasoning_id},
+                    {"type",    "response.reasoning_text.delta"},
+                }},
+            });
+        }
+
+        if (!diff.content_delta.empty()) {
+            if (!text_block_started) {
+                events.push_back(json {
+                    {"event", "response.output_item.added"},
+                    {"data", json {
+                        {"type", "response.output_item.added"},
+                        {"item", json {
+                            {"content", json::array()},
+                            {"id",      oai_resp_message_id},
+                            {"role",    "assistant"},
+                            {"status",  "in_progress"},
+                            {"type",    "message"},
+                        }},
+                    }},
+                });
+                events.push_back(json {
+                    {"event", "response.content_part.added"},
+                    {"data", json {
+                        {"type",    "response.content_part.added"},
+                        {"item_id", oai_resp_message_id},
+                        {"part", json {
+                            {"type", "output_text"},
+                            {"text", ""},
+                        }},
+                    }},
+                });
+                text_block_started = true;
+            }
+            events.push_back(json {
+                {"event", "response.output_text.delta"},
+                {"data", json {
+                    {"type",    "response.output_text.delta"},
+                    {"item_id", oai_resp_message_id},
+                    {"delta",   diff.content_delta},
+                }},
+            });
+        }
+
+        if (!diff.tool_call_delta.name.empty()) {
+            events.push_back(json {
+                {"event", "response.output_item.added"},
+                {"data", json {
+                    {"type",  "response.output_item.added"},
+                    {"item", json {
+                        {"arguments", ""},
+                        {"call_id",   "fc_" + diff.tool_call_delta.id},
+                        {"name",      diff.tool_call_delta.name},
+                        {"type",      "function_call"},
+                        {"status",    "in_progress"},
+                    }},
+                }},
+            });
+            oai_resp_fc_id = diff.tool_call_delta.id;
+        }
+
+        if (!diff.tool_call_delta.arguments.empty()) {
+            events.push_back(json {
+                {"event", "response.function_call_arguments.delta"},
+                {"data", json {
+                    {"delta",   diff.tool_call_delta.arguments},
+                    {"item_id", "fc_" + oai_resp_fc_id},
+                    {"type",    "response.function_call_arguments.delta"},
+                }},
+            });
+        }
+    }
+    return events;
+}
+
 //
 // server_task_result_embd
 //
@@ -1629,8 +1600,8 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
     // use local copies of streaming state (copied from task_result_state in update())
     // these reflect the state BEFORE this chunk was processed
-    bool thinking_started = anthropic_thinking_block_started;
-    bool text_started     = anthropic_text_block_started;
+    bool thinking_started = thinking_block_started;
+    bool text_started     = text_block_started;
 
     for (const auto & diff : oaicompat_msg_diffs) {
         // handle thinking/reasoning content
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 2a44b0824b..471f51df9a 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -99,13 +99,15 @@ struct task_result_state {
     std::string generated_text; // append new chunks of generated text here
     std::vector<std::string> generated_tool_call_ids;
 
-    // for OpenAI Responses API
-    // contains "resp_...", "rs_...", "fc_...", and "msg_..." generated during streaming
-    std::vector<std::string> openai_responses_item_ids;
+    // for OpenAI Responses and Anthropic streaming API:
+    // track output item / content block state across chunks
+    bool thinking_block_started = false;
+    bool text_block_started = false;
 
-    // for Anthropic API streaming: track content block state across chunks
-    bool anthropic_thinking_block_started = false;
-    bool anthropic_text_block_started = false;
+    // for OpenAI Responses streaming API
+    const std::string oai_resp_reasoning_id = "rs_" + random_string();
+    const std::string oai_resp_message_id   = "msg_" + random_string();
+    std::string oai_resp_fc_id; // function call ID for current args delta
 
     task_result_state(const common_chat_parser_params & chat_parser_params)
         : chat_parser_params(chat_parser_params) {}
@@ -357,8 +359,9 @@ struct server_task_result_cmpl_final : server_task_result {
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
 
-    // to be copied from task_result_state by update()
-    std::vector<std::string> openai_responses_item_ids;
+    // for OpenAI Responses API
+    std::string oai_resp_reasoning_id;
+    std::string oai_resp_message_id;
 
     virtual bool is_stop() override {
         return true; // in stream mode, final responses are considered stop
@@ -369,7 +372,9 @@ struct server_task_result_cmpl_final : server_task_result {
     virtual void update(task_result_state & state) override {
         is_updated = true;
         oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs);
-        openai_responses_item_ids = state.openai_responses_item_ids;
+
+        oai_resp_reasoning_id = state.oai_resp_reasoning_id;
+        oai_resp_message_id = state.oai_resp_message_id;
     }
 
     json to_json_non_oaicompat();
@@ -410,14 +415,17 @@ struct server_task_result_cmpl_partial : server_task_result {
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
 
-    // for OpenAI Responses API: Events emitted by current chunk
-    std::vector<json> openai_responses_current_events;
+    // Streaming state copied from task_result_state for this chunk
+    bool thinking_block_started = false;
+    bool text_block_started = false;
+
+    // for OpenAI Responses API
+    std::string oai_resp_reasoning_id;
+    std::string oai_resp_message_id;
+    std::string oai_resp_fc_id;
 
     // for Anthropic API: track if any reasoning content has been generated
     bool anthropic_has_reasoning = false;
-    // Streaming state copied from task_result_state for this chunk
-    bool anthropic_thinking_block_started = false;
-    bool anthropic_text_block_started = false;
 
     virtual bool is_stop() override {
         return false; // in stream mode, partial responses are not considered stop
@@ -433,6 +441,8 @@ struct server_task_result_cmpl_partial : server_task_result {
 
     json to_json_oaicompat_chat();
 
+    json to_json_oaicompat_resp();
+
     json to_json_anthropic();
 };
 

From 5e1f65c01b42f3792a19ad42d589259cd1963039 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 06:48:31 +0000
Subject: [PATCH 18/23] catch up with upstream/master

---
 tools/server/server-context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 3e3b230591..9a828e1eff 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -3588,7 +3588,7 @@ void server_routes::init_routes() {
         json body = convert_responses_to_chatcmpl(json::parse(req.body));
         json body_parsed = oaicompat_chat_params_parse(
             body,
-            ctx_server.oai_parser_opt,
+            meta->chat_params,
             files);
         return handle_completions_impl(
             req,

From 951fe420a93fe28665fc6666e11bc5250340e2d4 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 07:03:10 +0000
Subject: [PATCH 19/23] Fix style - "type" is the first item of SSE data

---
 tools/server/server-task.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 75ec0f4074..dfc5a9b342 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1454,9 +1454,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
             events.push_back(json {
                 {"event", "response.reasoning_text.delta"},
                 {"data", json {
+                    {"type",    "response.reasoning_text.delta"},
                     {"delta",   diff.reasoning_content_delta},
                     {"item_id", oai_resp_reasoning_id},
-                    {"type",    "response.reasoning_text.delta"},
                 }},
             });
         }
@@ -1520,9 +1520,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
             events.push_back(json {
                 {"event", "response.function_call_arguments.delta"},
                 {"data", json {
+                    {"type",    "response.function_call_arguments.delta"},
                     {"delta",   diff.tool_call_delta.arguments},
                     {"item_id", "fc_" + oai_resp_fc_id},
-                    {"type",    "response.function_call_arguments.delta"},
                 }},
             });
         }

From ebb643863d2aae0448b165a68173d564e47dd942 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 13:48:12 +0000
Subject: [PATCH 20/23] Explicitly check "instructions" from response_body

---
 tools/server/server-common.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index f60cb3e285..d81cb85f42 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1082,11 +1082,10 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     chatcmpl_body.erase("input");
     std::vector<json> chatcmpl_messages;
 
-    const std::string instructions = json_value(response_body, "instructions", std::string());
-    if (instructions != "") {
+    if (response_body.contains("instructions")) {
         chatcmpl_messages.push_back({
             {"role",    "system"},
-            {"content", instructions},
+            {"content", json_value(response_body, "instructions", std::string())},
         });
         chatcmpl_body.erase("instructions");
     }

From cf83e1abcce136b4f9adcbaa9a4fc59fe25de2d3 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 13:48:39 +0000
Subject: [PATCH 21/23] Make lambdas static

---
 tools/server/server-common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index d81cb85f42..ccf90329b3 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1099,10 +1099,10 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     } else if (input_value.is_array()) {
         // #responses_create-input-input_item_list
 
-        const auto exists_and_is_array = [](const json & j, const char * key) -> bool {
+        static auto exists_and_is_array = [](const json & j, const char * key) -> bool {
             return j.contains(key) && j.at(key).is_array();
         };
-        const auto exists_and_is_string = [](const json & j, const char * key) -> bool {
+        static auto exists_and_is_string = [](const json & j, const char * key) -> bool {
             return j.contains(key) && j.at(key).is_string();
         };
 

From 0d5e3deedb210a84465c9cca8c584f428eb8e393 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 13:48:58 +0000
Subject: [PATCH 22/23] Check if reasoning content exists

---
 tools/server/server-common.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index ccf90329b3..ebf0c2ef8c 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1273,6 +1273,16 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                 item.at("type") == "reasoning") {
                 // #responses_create-input-input_item_list-item-reasoning
 
+                if (!exists_and_is_array(item, "content")) {
+                    throw std::invalid_argument("item['content'] is not an array");
+                }
+                if (item.at("content").empty()) {
+                    throw std::invalid_argument("item['content'] is empty");
+                }
+                if (!exists_and_is_string(item.at("content")[0], "text")) {
+                    throw std::invalid_argument("item['content']['text'] is not a string");
+                }
+
                 // Pack reasoning content in dummy message
                 chatcmpl_messages.push_back(json {
                     {"role", "assistant"},

From 5ac23d2f8f00451080cd774f378e36c6e7e88fdc Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 20 Jan 2026 13:47:07 +0000
Subject: [PATCH 23/23] Add `oai_resp_id` to task_result_state(also initialized
 at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final

---
 tools/server/server-task.cpp | 18 +++++++++---------
 tools/server/server-task.h   | 15 +++++++++++----
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index dfc5a9b342..bbe49ad2aa 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -857,7 +857,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
     json res = {
         {"completed_at", t},
         {"created_at",   t},
-        {"id",           "resp_" + oaicompat_cmpl_id.substr(9)},
+        {"id",           oai_resp_id},
         {"model",        oaicompat_model},
         {"object",       "response"},
         {"output",       output},
@@ -965,7 +965,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         {"data", json {
             {"type", "response.completed"},
             {"response", json {
-                {"id",         "resp_" + oaicompat_cmpl_id.substr(9)},
+                {"id",         oai_resp_id},
                 {"object",     "response"},
                 {"created_at", t},
                 {"status",     "completed"},
@@ -1245,11 +1245,12 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
 
     // Copy current state for use in to_json_*() (reflects state BEFORE this chunk)
     thinking_block_started = state.thinking_block_started;
-    text_block_started = state.text_block_started;
+    text_block_started     = state.text_block_started;
 
-    oai_resp_reasoning_id = state.oai_resp_reasoning_id;
-    oai_resp_message_id   = state.oai_resp_message_id;
-    oai_resp_fc_id        = state.oai_resp_fc_id;
+    oai_resp_id            = state.oai_resp_id;
+    oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
+    oai_resp_message_id    = state.oai_resp_message_id;
+    oai_resp_fc_id         = state.oai_resp_fc_id;
 
     // track if the accumulated message has any reasoning content
     anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
@@ -1407,13 +1408,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
     std::vector<json> events;
 
     if (n_decoded == 1) {
-        const std::string response_id = "resp_" + oaicompat_cmpl_id.substr(9);
         events.push_back(json {
             {"event", "response.created"},
             {"data", json {
                 {"type", "response.created"},
                 {"response", json {
-                    {"id",     response_id},
+                    {"id",     oai_resp_id},
                     {"object", "response"},
                     {"status", "in_progress"},
                 }},
@@ -1424,7 +1424,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
             {"data", json {
                 {"type", "response.in_progress"},
                 {"response", json {
-                    {"id",     response_id},
+                    {"id",     oai_resp_id},
                     {"object", "response"},
                     {"status", "in_progress"},
                 }},
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 471f51df9a..244470596b 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -105,12 +105,16 @@ struct task_result_state {
     bool text_block_started = false;
 
     // for OpenAI Responses streaming API
-    const std::string oai_resp_reasoning_id = "rs_" + random_string();
-    const std::string oai_resp_message_id   = "msg_" + random_string();
+    const std::string oai_resp_id;
+    const std::string oai_resp_reasoning_id;
+    const std::string oai_resp_message_id;
     std::string oai_resp_fc_id; // function call ID for current args delta
 
     task_result_state(const common_chat_parser_params & chat_parser_params)
-        : chat_parser_params(chat_parser_params) {}
+        : chat_parser_params(chat_parser_params)
+        , oai_resp_id("resp_" + random_string())
+        , oai_resp_reasoning_id("rs_" + random_string())
+        , oai_resp_message_id("msg_" + random_string()) {}
 
     // parse partial tool calls and update the internal state
     common_chat_msg update_chat_msg(
@@ -360,6 +364,7 @@ struct server_task_result_cmpl_final : server_task_result {
     bool is_updated = false;
 
     // for OpenAI Responses API
+    std::string oai_resp_id;
     std::string oai_resp_reasoning_id;
     std::string oai_resp_message_id;
 
@@ -373,6 +378,7 @@ struct server_task_result_cmpl_final : server_task_result {
         is_updated = true;
         oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs);
 
+        oai_resp_id = state.oai_resp_id;
         oai_resp_reasoning_id = state.oai_resp_reasoning_id;
         oai_resp_message_id = state.oai_resp_message_id;
     }
@@ -417,9 +423,10 @@ struct server_task_result_cmpl_partial : server_task_result {
 
     // Streaming state copied from task_result_state for this chunk
     bool thinking_block_started = false;
-    bool text_block_started = false;
+    bool text_block_started     = false;
 
     // for OpenAI Responses API
+    std::string oai_resp_id;
     std::string oai_resp_reasoning_id;
     std::string oai_resp_message_id;
     std::string oai_resp_fc_id;