auroralabs-loci · loci-dev · Dec 23, 2025 · Dec 23, 2025 · Dec 23, 2025 · Dec 23, 2025
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -284,14 +284,17 @@ static handle_model_result common_params_handle_model(
             model.path = common_docker_resolve_model(model.docker_repo);
             model.name = model.docker_repo; // set name for consistency
         } else if (!model.hf_repo.empty()) {
+            // use hf_repo as name if not already set
+            if (model.name.empty()) {
+                model.name = model.hf_repo;
+            }
             // short-hand to avoid specifying --hf-file -> default it to --model
             if (model.hf_file.empty()) {
                 if (model.path.empty()) {
                     auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
                     if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
                         exit(1); // built without CURL, error message already printed
                     }
-                    model.name    = model.hf_repo;      // repo name with tag
                     model.hf_repo = auto_detected.repo; // repo name without tag
                     model.hf_file = auto_detected.ggufFile;
                     if (!auto_detected.mmprojFile.empty()) {
@@ -2697,6 +2700,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.ssl_file_cert = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE"));
+    add_opt(common_arg(
+        {"--mcp-config"}, "FNAME",
+        "path to MCP server configuration JSON file",
+        [](common_params & params, const std::string & value) {
+            params.mcp_config = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MCP_CONFIG"));
     add_opt(common_arg(
         {"--chat-template-kwargs"}, "STRING",
         string_format("sets additional params for the json template parser"),

diff --git a/common/common.h b/common/common.h
@@ -485,6 +485,9 @@ struct common_params {
 
     std::map<std::string, std::string> default_template_kwargs;
 
+    // MCP config
+    std::string mcp_config = "";                                                                           // NOLINT
+
     // webui configs
     bool webui = true;
     std::string webui_config_json;

diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt
@@ -46,6 +46,12 @@ set(TARGET_SRCS
     server-common.h
     server-context.cpp
     server-context.h
+    server-ws.cpp
+    server-ws.h
+    server-mproc.cpp
+    server-mproc.h
+    server-mcp-bridge.cpp
+    server-mcp-bridge.h
 )
 set(PUBLIC_ASSETS
     index.html.gz

diff --git a/tools/server/README.md b/tools/server/README.md
@@ -1679,6 +1679,77 @@ Apart from error types supported by OAI, we also have custom types that are spec
 }
 ```
 
+### MCP (Model Context Protocol) Support
+
+The server supports [MCP](https://modelcontextprotocol.io/) for integrating external tools via WebSocket. MCP enables models to interact with external services like file systems, databases, APIs, and more.
+
+#### MCP Configuration
+
+Create an MCP configuration file (JSON format):
+
+```json
+{
+  "mcpServers": {
+    "filesystem": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/allowed/dir"],
+      "env": {}
+    },
+    "brave-search": {
+      "command": "npx",
+      "args": ["-y", "@anthropic/mcp-server-brave-search"],
+      "env": {
+        "BRAVE_API_KEY": "your-api-key"
+      }
+    }
+  }
+}
+```
+
+#### MCP Configuration Location
+
+The server looks for MCP configuration in the following order:
+1. `--mcp-config` command-line argument
+2. `LLAMA_MCP_CONFIG` environment variable
+3. `~/.llama.cpp/mcp.json` (Linux/macOS)
+4. `%APPDATA%/llama.cpp/mcp.json` (Windows)
+
+#### MCP Usage
+
+```bash
+# Use default config location (~/.llama.cpp/mcp.json)
+./llama-server -m model.gguf
+
+# Or specify config path
+./llama-server -m model.gguf --mcp-config /path/to/mcp.json
+
+# Or use environment variable
+LLAMA_MCP_CONFIG=/path/to/mcp.json ./llama-server -m model.gguf
+```
+
+#### MCP WebSocket Port
+
+MCP uses WebSocket on HTTP port + 1 (default: 8081 when HTTP is on 8080). The frontend discovers the actual port via the `/mcp/ws-port` endpoint.
+
+#### MCP API Endpoints
+
+| Endpoint | Description |
+|----------|-------------|
+| `GET /mcp/servers` | List available MCP servers from configuration |
+| `GET /mcp/ws-port` | Get the WebSocket port number |
+| `WS /mcp?server=<name>` | WebSocket connection (on port+1) |
+
+#### MCP Protocol
+
+The MCP bridge implements JSON-RPC 2.0 over WebSocket. Key methods:
+- `initialize` - Establish MCP session
+- `tools/list` - List available tools
+- `tools/call` - Execute a tool
+- `resources/list` - List available resources
+- `resources/read` - Read a resource
+
+For more information about MCP, see the [Model Context Protocol documentation](https://modelcontextprotocol.io/).
+
 ### Legacy completion web UI
 
 A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./tools/server/public_legacy`

diff --git a/tools/server/mcp_config.example.json b/tools/server/mcp_config.example.json
@@ -0,0 +1,45 @@
+{
+  "_comment": "Example MCP configuration for llama.cpp",
+  "_comment_windows": "On Windows, place this file in %APPDATA%\\llama.cpp\\mcp.json",
+  "_comment_macos": "On macOS/Linux, place this file in ~/.llama.cpp/mcp.json",
+  "_comment_env": "Or set the LLAMA_MCP_CONFIG environment variable to point to this file",
+  "mcpServers": {
+    "filesystem": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "@modelcontextprotocol/server-filesystem",
+        "/allowed/path"
+      ]
+    },
+    "brave-search": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "@modelcontextprotocol/server-brave-search"
+      ],
+      "env": {
+        "BRAVE_API_KEY": "your-api-key-here"
+      }
+    },
+    "github": {
+      "command": "npx",
+      "args": [
+        "-y",
+        "@modelcontextprotocol/server-github"
+      ],
+      "env": {
+        "GITHUB_TOKEN": "your-github-token-here"
+      }
+    },
+    "_comment_cwd_example": "Example: Run a custom MCP server script from a specific directory",
+    "my-script": {
+      "command": "python",
+      "args": ["server.py"],
+      "cwd": "/path/to/working/directory",
+      "env": {
+        "PYTHONUNBUFFERED": "1"
+      }
+    }
+  }
+}
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp
@@ -302,10 +302,14 @@ bool server_http_context::start() {
     return true;
 }
 
-void server_http_context::stop() const {
+void server_http_context::stop() {
     if (pimpl->srv) {
         pimpl->srv->stop();
     }
+    // Wait for server thread to finish
+    if (thread.joinable()) {
+        thread.join();
+    }
 }
 
 static void set_headers(httplib::Response & res, const std::map<std::string, std::string> & headers) {

diff --git a/tools/server/server-http.h b/tools/server/server-http.h
@@ -65,7 +65,7 @@ struct server_http_context {
 
     bool init(const common_params & params);
     bool start();
-    void stop() const;
+    void stop();
 
     // note: the handler should never throw exceptions
     using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;