ggml-org · angt · May 28, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
@@ -5,9 +5,9 @@
 
 #include <cpp-httplib/httplib.h>
 
-#include <cstdlib>
 #include <functional>
 #include <future>
+#include <memory>
 #include <string>
 #include <thread>
 
@@ -21,7 +21,7 @@ class server_http_context::Impl {
 };
 
 server_http_context::server_http_context()
-    : pimpl(std::make_unique<server_http_context::Impl>())
+    : pimpl(std::make_unique<Impl>())
 {}
 
 server_http_context::~server_http_context() = default;
@@ -62,7 +62,7 @@ struct gcp_params {
     }
 
     static std::string getenv(const char * name, const std::string & default_value, bool ensure_leading_slash = false) {
-        const char * value = std::getenv(name);
+        const auto * value = std::getenv(name);
         if (value == nullptr || value[0] == '\0') {
             return default_value;
         }
@@ -94,15 +94,15 @@ bool server_http_context::init(const common_params & params) {
     auto & srv = pimpl->srv;
 
 #ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-    if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
+    if (!params.ssl_file_key.empty() && !params.ssl_file_cert.empty()) {
         SRV_INF("running with SSL: key = %s, cert = %s\n", params.ssl_file_key.c_str(), params.ssl_file_cert.c_str());
-        srv.reset(
-            new httplib::SSLServer(params.ssl_file_cert.c_str(), params.ssl_file_key.c_str())
+        srv = std::make_unique<httplib::SSLServer>(
+            params.ssl_file_cert.c_str(), params.ssl_file_key.c_str()
         );
         is_ssl = true;
     } else {
         SRV_INF("%s", "running without SSL\n");
-        srv.reset(new httplib::Server());
+        srv = std::make_unique<httplib::Server>();
     }
 #else
     if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
@@ -150,7 +150,7 @@ bool server_http_context::init(const common_params & params) {
     // set timeouts and change hostname and port
     srv->set_read_timeout (params.timeout_read);
     srv->set_write_timeout(params.timeout_write);
-    srv->set_socket_options([reuse_port = params.reuse_port](socket_t sock) {
+    srv->set_socket_options([reuse_port = params.reuse_port](const socket_t sock) {
         httplib::set_socket_opt(sock, SOL_SOCKET, SO_REUSEADDR, 1);
         if (reuse_port) {
 #ifdef SO_REUSEPORT
@@ -162,8 +162,8 @@ bool server_http_context::init(const common_params & params) {
     });
 
     if (params.api_keys.size() == 1) {
-        auto key = params.api_keys[0];
-        std::string substr = key.substr(std::max((int)(key.length() - 4), 0));
+        const auto key = params.api_keys[0];
+        const std::string substr = key.substr(std::max(static_cast<int>(key.length() - 4), 0));
         SRV_INF("api_keys: ****%s\n", substr.c_str());
     } else if (params.api_keys.size() > 1) {
         SRV_INF("api_keys: %zu keys loaded\n", params.api_keys.size());
@@ -203,7 +203,7 @@ bool server_http_context::init(const common_params & params) {
         }
 
         // remove the "Bearer " prefix if needed
-        std::string prefix = "Bearer ";
+        static std::string prefix = "Bearer ";
         if (req_api_key.substr(0, prefix.size()) == prefix) {
             req_api_key = req_api_key.substr(prefix.size());
         }
@@ -232,11 +232,10 @@ bool server_http_context::init(const common_params & params) {
     };
 
     auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
-        bool ready = is_ready.load();
-        if (!ready) {
+        if (!is_ready.load()) {
 #if defined(LLAMA_UI_HAS_ASSETS)
-            auto tmp = string_split<std::string>(req.path, '.');
-            if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
+            if (const auto tmp = string_split<std::string>(req.path, '.');
+                req.path == "/" || (!tmp.empty() && tmp.back() == "html")) {
                 if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
                     res.status = 503;
                     res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
@@ -284,17 +283,17 @@ bool server_http_context::init(const common_params & params) {
         return httplib::Server::HandlerResponse::Unhandled;
     });
 
-    int n_threads_http = params.n_threads_http;
+    auto n_threads_http = params.n_threads_http;
     if (n_threads_http < 1) {
         // +4 threads for monitoring, health and some threads reserved for MCP and other tasks in the future
-        n_threads_http = std::max(params.n_parallel + 4, (int32_t) std::thread::hardware_concurrency() - 1);
+        n_threads_http = std::max(params.n_parallel + 4, static_cast<int32_t>(std::thread::hardware_concurrency() - 1));
     }
     SRV_INF("using %d threads for HTTP server\n", n_threads_http);
     srv->new_task_queue = [n_threads_http] {
         // spawn n_threads_http fixed thread (always alive), while allow up to 1024 max possible additional threads
         // when n_threads_http is used, server will create new "dynamic" threads that will be destroyed after processing each request
         // ref: https://github.com/yhirose/cpp-httplib/pull/2368
-        size_t max_threads = (size_t)n_threads_http + 1024;
+        const auto max_threads = static_cast<size_t>(n_threads_http + 1024);
         return new httplib::ThreadPool(n_threads_http, max_threads);
     };
 
@@ -310,10 +309,9 @@ bool server_http_context::init(const common_params & params) {
         // register static assets routes
         if (!params.public_path.empty()) {
             // Set the base directory for serving static files
-            bool is_found = srv->set_mount_point(params.api_prefix + "/", params.public_path);
-            if (!is_found) {
+            if (const auto is_found = srv->set_mount_point(params.api_prefix + "/", params.public_path); !is_found) {
                 SRV_ERR("static assets path not found: %s\n", params.public_path.c_str());
-                return 1;
+                return false;
             }
         } else {
 #if defined(LLAMA_UI_HAS_ASSETS)
@@ -346,9 +344,9 @@ bool server_http_context::init(const common_params & params) {
 bool server_http_context::start() {
     // Bind and listen
 
-    auto & srv = pimpl->srv;
-    bool was_bound = false;
-    bool is_sock = false;
+    const auto & srv = pimpl->srv;
+    auto was_bound = false;
+    auto is_sock = false;
     if (string_ends_with(std::string(hostname), ".sock")) {
         is_sock = true;
         SRV_INF("%s", "setting address family to AF_UNIX\n");
@@ -360,7 +358,7 @@ bool server_http_context::start() {
         SRV_INF("%s", "binding port with default address family\n");
         // bind HTTP listen port
         if (port == 0) {
-            int bound_port = srv->bind_to_any_port(hostname);
+            const auto bound_port = srv->bind_to_any_port(hostname);
             was_bound = (bound_port >= 0);
             if (was_bound) {
                 port = bound_port;
@@ -376,7 +374,7 @@ bool server_http_context::start() {
     }
 
     // run the HTTP server in a thread
-    thread = std::thread([this]() { pimpl->srv->listen_after_bind(); });
+    thread = std::thread([this] { pimpl->srv->listen_after_bind(); });
     srv->wait_until_ready();
 
     listening_address = is_sock ? string_format("unix://%s", hostname.c_str())
@@ -433,13 +431,13 @@ static void process_handler_response(server_http_req_ptr && request, server_http
     if (response->is_stream()) {
         res.status = response->status;
         set_headers(res, response->headers);
-        std::string content_type = response->content_type;
+        const std::string content_type = response->content_type;
         // convert to shared_ptr as both chunked_content_provider() and on_complete() need to use it
-        std::shared_ptr<server_http_req> q_ptr = std::move(request);
-        std::shared_ptr<server_http_res> r_ptr = std::move(response);
-        const auto chunked_content_provider = [response = r_ptr](size_t, httplib::DataSink & sink) -> bool {
+        std::shared_ptr q_ptr = std::move(request);
+        std::shared_ptr r_ptr = std::move(response);
+        const auto chunked_content_provider = [response = r_ptr](size_t, const httplib::DataSink & sink) -> bool {
             std::string chunk;
-            bool has_next = response->next(chunk);
+            const bool has_next = response->next(chunk);
             if (!chunk.empty()) {
                 if (!sink.write(chunk.data(), chunk.size())) {
                     return false;
@@ -550,7 +548,7 @@ static std::string path_to_gcp_format(const std::string & path) {
         if (c == '/' || c == '-' || c == '_') {
             cap = true;
         } else {
-            result += cap ? (char)std::toupper(c) : (char)c;
+            result += static_cast<char>(cap ? std::toupper(c) : c);
             cap = false;
         }
     }
@@ -574,7 +572,7 @@ static json parse_gcp_predict_response(const server_http_res_ptr & res) {
     }
 }
 
-void server_http_context::register_gcp_compat() {
+void server_http_context::register_gcp_compat() const {
     const gcp_params gcp;
 
     if (!gcp.enabled) {
@@ -595,7 +593,7 @@ void server_http_context::register_gcp_compat() {
     }
 
     if (!gcp.path_health.empty()) {
-        auto health_handler = handlers.find("/health");
+        const auto health_handler = handlers.find("/health");
         GGML_ASSERT(health_handler != handlers.end());
         get(gcp.path_health, health_handler->second);
     }

@@ -73,7 +73,7 @@ struct server_http_context {
 
     std::string path_prefix;
     std::string hostname;
-    int port;
+    int port    = 8080;
     bool is_ssl = false;
 
     server_http_context();
@@ -88,7 +88,7 @@ struct server_http_context {
 
     // Register the Google Cloud Platform (Vertex AI) compat (AIP_PREDICT_ROUTE env var, or /predict)
     // Must be called AFTER all other API routes are registered
-    void register_gcp_compat();
+    void register_gcp_compat() const;
 
     // for debugging
     std::string listening_address;