Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/requirements-tool_bench.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pytest~=8.3.3
huggingface_hub>=0.34.0,<1.0
matplotlib~=3.10.0
numpy~=1.26.4
openai~=1.55.3
openai~=2.14.0
pandas~=2.2.3
prometheus-client~=0.20.0
requests~=2.32.3
Expand Down
127 changes: 127 additions & 0 deletions tools/server/server-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,115 @@ json oaicompat_chat_params_parse(
return llama_params;
}

json convert_responses_to_chatcmpl(const json & body) {
if (!body.contains("input")) {
throw std::invalid_argument("'input' is required");
}
if (!json_value(body, "previous_response_id", std::string{}).empty()) {
throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
}

const json input_value = body.at("input");
json chatcmpl_messages = json::array();

const std::string instructions = json_value(body, "instructions", std::string());
if (instructions != "") {
chatcmpl_messages.push_back({
{"role", "system"},
{"content", instructions},
});
}

if (input_value.is_string()) {
chatcmpl_messages.push_back({
{"role", "user"},
{"content", input_value},
});
} else if (input_value.is_array()) {
for (const auto & input_message : input_value) {
if (!input_message.contains("content")) {
throw std::invalid_argument("'content' is required");
}
const json content = input_message.at("content");

if (content.is_string()) {
chatcmpl_messages.push_back(input_message);
} else if (content.is_array()) {
json new_content = json::array();

for (const auto & input_item : content) {
const std::string type = json_value(input_item, "type", std::string());

if (type == "input_text") {
if (!input_item.contains("text")) {
throw std::invalid_argument("'Input text' requires 'text'");
}
new_content.push_back({
{"text", input_item.at("text")},
{"type", "text"}
});
} else if (type == "input_image") {
// While `detail` is marked as required,
// it has default value("auto") and can be omitted.

if (!input_item.contains("image_url")) {
throw std::invalid_argument("'image_url' is required");
}
new_content.push_back({
{"image_url", json {{"url", input_item.at("image_url")}}},
{"type", "image_url"}
});
} else if (type == "input_file") {
if (input_item.contains("file_url")) {
throw std::invalid_argument("'file_url' is not supported");
}
if (!input_item.contains("file_data") || !input_item.contains("filename")) {
throw std::invalid_argument("Both 'file_data' and 'filename' are required");
}
new_content.push_back({
{"file", json {
{"file_data", input_item.at("file_data")},
{"filename", input_item.at("filename")}}},
{"type", "file"}
});
} else {
throw std::invalid_argument("'type' must be one of input_text, input_image, or input_file");
}
}

json new_input_message = input_message;
new_input_message["content"] = new_content;

chatcmpl_messages.push_back(new_input_message);
} else {
throw std::invalid_argument("'content' must be a string or array of objects");
}

const std::string role = json_value(input_message, "role", std::string());
if (role != "user" && role != "assistant" && role != "system" && role != "developer") {
throw std::invalid_argument("'role' must be one of user, assistant, system, or developer");
}

if (input_message.contains("type") && input_message.at("type") != "message") {
throw std::invalid_argument("If 'type' is defined, it should be 'message'");
}
}
} else {
throw std::invalid_argument("'input' must be a string or array of objects");
}

json chatcmpl_body = body;
chatcmpl_body.erase("input");
chatcmpl_body["messages"] = chatcmpl_messages;

if (body.contains("max_output_tokens")) {
chatcmpl_body.erase("max_output_tokens");
chatcmpl_body["max_tokens"] = body["max_output_tokens"];
}

return chatcmpl_body;
}

json convert_anthropic_to_oai(const json & body) {
json oai_body;

Expand Down Expand Up @@ -1478,6 +1587,24 @@ std::string format_oai_sse(const json & data) {
return ss.str();
}

std::string format_oai_resp_sse(const json & data) {
std::ostringstream ss;
auto send_single = [&ss](const json & event_obj) {
ss << "event: " << event_obj.at("event").get<std::string>() << "\n";
ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n";
};

if (data.is_array()) {
for (const auto & item : data) {
send_single(item);
}
} else {
send_single(data);
}

return ss.str();
}

std::string format_anthropic_sse(const json & data) {
std::ostringstream ss;

Expand Down
5 changes: 5 additions & 0 deletions tools/server/server-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ json oaicompat_chat_params_parse(
const oaicompat_parser_options & opt,
std::vector<raw_buffer> & out_files);

// convert OpenAI Responses API format to OpenAI Chat Completions API format
json convert_responses_to_chatcmpl(const json & body);

// convert Anthropic Messages API format to OpenAI Chat Completions API format
json convert_anthropic_to_oai(const json & body);

Expand Down Expand Up @@ -332,6 +335,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l
// note: if data is a json array, it will be sent as multiple events, one per item
std::string format_oai_sse(const json & data);

std::string format_oai_resp_sse(const json & data);

// format Anthropic-style SSE with event types
std::string format_anthropic_sse(const json & data);

Expand Down
87 changes: 80 additions & 7 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,58 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
json first_result_json = first_result->to_json();
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
res->data = format_anthropic_sse(first_result_json);
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
const json created = {
{"event", "response.created"},
{"data", json {
{"type", "response.created"},
{"response", json {
{"object", "response"},
{"status", "in_progress"}
}}
}}
};
const json in_progress = {
{"event", "response.in_progress"},
{"data", json {
{"type", "response.in_progress"},
{"response", json {
{"object", "response"},
{"status", "in_progress"}
}}
}}
};
const json output_item_added = {
{"event", "response.output_item.added"},
{"data", json {
{"type", "response.output_item.added"},
{"item", json {
{"type", "message"},
{"status", "in_progress"},
{"content", json::array()},
{"role", "assistant"}
}}
}}
};
const json content_part_added = {
{"event", "response.content_part.added"},
{"data", json {
{"type", "response.content_part.added"},
{"part", json {
{"type", "output_text"},
{"text", ""}
}}
}}
};

const json initial_events = json::array({
created,
in_progress,
output_item_added,
content_part_added
});

res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json);
} else {
res->data = format_oai_sse(first_result_json);
}
Expand Down Expand Up @@ -3012,13 +3064,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(

// check if there is more data
if (!rd.has_next()) {
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
// Anthropic doesn't send [DONE], message_stop was already sent
output = "";
} else if (res_type != TASK_RESPONSE_TYPE_NONE) {
output = "data: [DONE]\n\n";
} else {
output = "";
switch (res_type) {
case TASK_RESPONSE_TYPE_NONE:
case TASK_RESPONSE_TYPE_OAI_RESP:
case TASK_RESPONSE_TYPE_ANTHROPIC:
output = "";
break;

default:
output = "data: [DONE]\n\n";
break;
}
SRV_DBG("%s", "all results received, terminating stream\n");
return false; // no more data, terminate
Expand All @@ -3045,6 +3100,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
json res_json = result->to_json();
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
output = format_anthropic_sse(res_json);
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
output = format_oai_resp_sse(res_json);
} else {
output = format_oai_sse(res_json);
}
Expand Down Expand Up @@ -3465,6 +3522,22 @@ void server_routes::init_routes() {
TASK_RESPONSE_TYPE_OAI_CHAT);
};

this->post_responses_oai = [this](const server_http_req & req) {
auto res = create_response();
std::vector<raw_buffer> files;
json body = convert_responses_to_chatcmpl(json::parse(req.body));
json body_parsed = oaicompat_chat_params_parse(
body,
ctx_server.oai_parser_opt,
files);
return handle_completions_impl(
req,
SERVER_TASK_TYPE_COMPLETION,
body_parsed,
files,
TASK_RESPONSE_TYPE_OAI_RESP);
};

this->post_anthropic_messages = [this](const server_http_req & req) {
auto res = create_response();
std::vector<raw_buffer> files;
Expand Down
1 change: 1 addition & 0 deletions tools/server/server-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ struct server_routes {
server_http_context::handler_t post_completions;
server_http_context::handler_t post_completions_oai;
server_http_context::handler_t post_chat_completions;
server_http_context::handler_t post_responses_oai;
server_http_context::handler_t post_anthropic_messages;
server_http_context::handler_t post_anthropic_count_tokens;
server_http_context::handler_t post_apply_template;
Expand Down
Loading