Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 32 additions & 34 deletions tools/server/server-http.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

#include <cpp-httplib/httplib.h>

#include <cstdlib>
#include <functional>
#include <future>
#include <memory>
#include <string>
#include <thread>

Expand All @@ -21,7 +21,7 @@ class server_http_context::Impl {
};

server_http_context::server_http_context()
: pimpl(std::make_unique<server_http_context::Impl>())
: pimpl(std::make_unique<Impl>())
{}

server_http_context::~server_http_context() = default;
Expand Down Expand Up @@ -62,7 +62,7 @@ struct gcp_params {
}

static std::string getenv(const char * name, const std::string & default_value, bool ensure_leading_slash = false) {
const char * value = std::getenv(name);
const auto * value = std::getenv(name);
if (value == nullptr || value[0] == '\0') {
return default_value;
}
Expand Down Expand Up @@ -94,15 +94,15 @@ bool server_http_context::init(const common_params & params) {
auto & srv = pimpl->srv;

#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
if (!params.ssl_file_key.empty() && !params.ssl_file_cert.empty()) {
SRV_INF("running with SSL: key = %s, cert = %s\n", params.ssl_file_key.c_str(), params.ssl_file_cert.c_str());
srv.reset(
new httplib::SSLServer(params.ssl_file_cert.c_str(), params.ssl_file_key.c_str())
srv = std::make_unique<httplib::SSLServer>(
params.ssl_file_cert.c_str(), params.ssl_file_key.c_str()
);
is_ssl = true;
} else {
SRV_INF("%s", "running without SSL\n");
srv.reset(new httplib::Server());
srv = std::make_unique<httplib::Server>();
}
#else
if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
Expand Down Expand Up @@ -150,7 +150,7 @@ bool server_http_context::init(const common_params & params) {
// set timeouts and change hostname and port
srv->set_read_timeout (params.timeout_read);
srv->set_write_timeout(params.timeout_write);
srv->set_socket_options([reuse_port = params.reuse_port](socket_t sock) {
srv->set_socket_options([reuse_port = params.reuse_port](const socket_t sock) {
httplib::set_socket_opt(sock, SOL_SOCKET, SO_REUSEADDR, 1);
if (reuse_port) {
#ifdef SO_REUSEPORT
Expand All @@ -162,8 +162,8 @@ bool server_http_context::init(const common_params & params) {
});

if (params.api_keys.size() == 1) {
auto key = params.api_keys[0];
std::string substr = key.substr(std::max((int)(key.length() - 4), 0));
const auto key = params.api_keys[0];
const std::string substr = key.substr(std::max(static_cast<int>(key.length() - 4), 0));
SRV_INF("api_keys: ****%s\n", substr.c_str());
} else if (params.api_keys.size() > 1) {
SRV_INF("api_keys: %zu keys loaded\n", params.api_keys.size());
Expand Down Expand Up @@ -203,7 +203,7 @@ bool server_http_context::init(const common_params & params) {
}

// remove the "Bearer " prefix if needed
std::string prefix = "Bearer ";
static std::string prefix = "Bearer ";
if (req_api_key.substr(0, prefix.size()) == prefix) {
req_api_key = req_api_key.substr(prefix.size());
}
Expand Down Expand Up @@ -232,11 +232,10 @@ bool server_http_context::init(const common_params & params) {
};

auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
bool ready = is_ready.load();
if (!ready) {
if (!is_ready.load()) {
#if defined(LLAMA_UI_HAS_ASSETS)
auto tmp = string_split<std::string>(req.path, '.');
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
if (const auto tmp = string_split<std::string>(req.path, '.');
req.path == "/" || (!tmp.empty() && tmp.back() == "html")) {
if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
Expand Down Expand Up @@ -284,17 +283,17 @@ bool server_http_context::init(const common_params & params) {
return httplib::Server::HandlerResponse::Unhandled;
});

int n_threads_http = params.n_threads_http;
auto n_threads_http = params.n_threads_http;
if (n_threads_http < 1) {
// +4 threads for monitoring, health and some threads reserved for MCP and other tasks in the future
n_threads_http = std::max(params.n_parallel + 4, (int32_t) std::thread::hardware_concurrency() - 1);
n_threads_http = std::max(params.n_parallel + 4, static_cast<int32_t>(std::thread::hardware_concurrency() - 1));
}
SRV_INF("using %d threads for HTTP server\n", n_threads_http);
srv->new_task_queue = [n_threads_http] {
// spawn n_threads_http fixed thread (always alive), while allow up to 1024 max possible additional threads
// when n_threads_http is used, server will create new "dynamic" threads that will be destroyed after processing each request
// ref: https://github.com/yhirose/cpp-httplib/pull/2368
size_t max_threads = (size_t)n_threads_http + 1024;
const auto max_threads = static_cast<size_t>(n_threads_http + 1024);
return new httplib::ThreadPool(n_threads_http, max_threads);
};

Expand All @@ -310,10 +309,9 @@ bool server_http_context::init(const common_params & params) {
// register static assets routes
if (!params.public_path.empty()) {
// Set the base directory for serving static files
bool is_found = srv->set_mount_point(params.api_prefix + "/", params.public_path);
if (!is_found) {
if (const auto is_found = srv->set_mount_point(params.api_prefix + "/", params.public_path); !is_found) {
SRV_ERR("static assets path not found: %s\n", params.public_path.c_str());
return 1;
return false;
}
} else {
#if defined(LLAMA_UI_HAS_ASSETS)
Expand Down Expand Up @@ -346,9 +344,9 @@ bool server_http_context::init(const common_params & params) {
bool server_http_context::start() {
// Bind and listen

auto & srv = pimpl->srv;
bool was_bound = false;
bool is_sock = false;
const auto & srv = pimpl->srv;
auto was_bound = false;
auto is_sock = false;
if (string_ends_with(std::string(hostname), ".sock")) {
is_sock = true;
SRV_INF("%s", "setting address family to AF_UNIX\n");
Expand All @@ -360,7 +358,7 @@ bool server_http_context::start() {
SRV_INF("%s", "binding port with default address family\n");
// bind HTTP listen port
if (port == 0) {
int bound_port = srv->bind_to_any_port(hostname);
const auto bound_port = srv->bind_to_any_port(hostname);
was_bound = (bound_port >= 0);
if (was_bound) {
port = bound_port;
Expand All @@ -376,7 +374,7 @@ bool server_http_context::start() {
}

// run the HTTP server in a thread
thread = std::thread([this]() { pimpl->srv->listen_after_bind(); });
thread = std::thread([this] { pimpl->srv->listen_after_bind(); });
srv->wait_until_ready();

listening_address = is_sock ? string_format("unix://%s", hostname.c_str())
Expand Down Expand Up @@ -433,13 +431,13 @@ static void process_handler_response(server_http_req_ptr && request, server_http
if (response->is_stream()) {
res.status = response->status;
set_headers(res, response->headers);
std::string content_type = response->content_type;
const std::string content_type = response->content_type;
// convert to shared_ptr as both chunked_content_provider() and on_complete() need to use it
std::shared_ptr<server_http_req> q_ptr = std::move(request);
std::shared_ptr<server_http_res> r_ptr = std::move(response);
const auto chunked_content_provider = [response = r_ptr](size_t, httplib::DataSink & sink) -> bool {
std::shared_ptr q_ptr = std::move(request);
std::shared_ptr r_ptr = std::move(response);
const auto chunked_content_provider = [response = r_ptr](size_t, const httplib::DataSink & sink) -> bool {
std::string chunk;
bool has_next = response->next(chunk);
const bool has_next = response->next(chunk);
if (!chunk.empty()) {
if (!sink.write(chunk.data(), chunk.size())) {
return false;
Expand Down Expand Up @@ -550,7 +548,7 @@ static std::string path_to_gcp_format(const std::string & path) {
if (c == '/' || c == '-' || c == '_') {
cap = true;
} else {
result += cap ? (char)std::toupper(c) : (char)c;
result += static_cast<char>(cap ? std::toupper(c) : c);
cap = false;
}
}
Expand All @@ -574,7 +572,7 @@ static json parse_gcp_predict_response(const server_http_res_ptr & res) {
}
}

void server_http_context::register_gcp_compat() {
void server_http_context::register_gcp_compat() const {
const gcp_params gcp;

if (!gcp.enabled) {
Expand All @@ -595,7 +593,7 @@ void server_http_context::register_gcp_compat() {
}

if (!gcp.path_health.empty()) {
auto health_handler = handlers.find("/health");
const auto health_handler = handlers.find("/health");
GGML_ASSERT(health_handler != handlers.end());
get(gcp.path_health, health_handler->second);
}
Expand Down
4 changes: 2 additions & 2 deletions tools/server/server-http.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct server_http_context {

std::string path_prefix;
std::string hostname;
int port;
int port = 8080;
bool is_ssl = false;

server_http_context();
Expand All @@ -88,7 +88,7 @@ struct server_http_context {

// Register the Google Cloud Platform (Vertex AI) compat (AIP_PREDICT_ROUTE env var, or /predict)
// Must be called AFTER all other API routes are registered
void register_gcp_compat();
void register_gcp_compat() const;

// for debugging
std::string listening_address;
Expand Down
Loading