Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.public_path = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
add_opt(common_arg(
{"--prefix"}, "PREFIX",
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.server_prefix.c_str()),
[](common_params & params, const std::string & value) {
params.server_prefix = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SERVER_PATH"));
add_opt(common_arg(
{"--no-webui"},
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ struct common_params {

std::string hostname = "127.0.0.1";
std::string public_path = ""; // NOLINT
std::string server_prefix = ""; // NOLINT
std::string chat_template = ""; // NOLINT
bool use_jinja = false; // NOLINT
bool enable_chat_template = true;
Expand Down
64 changes: 33 additions & 31 deletions tools/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4800,20 +4800,22 @@ int main(int argc, char ** argv) {
// Router
//

const char* server_prefix = params.server_prefix.c_str();

if (!params.webui) {
LOG_INF("Web UI is disabled\n");
} else {
// register static assets routes
if (!params.public_path.empty()) {
// Set the base directory for serving static files
bool is_found = svr->set_mount_point("/", params.public_path);
bool is_found = svr->set_mount_point(string_format("%s/", server_prefix), params.public_path);
if (!is_found) {
LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
return 1;
}
} else {
// using embedded static index.html
svr->Get("/", [](const httplib::Request & req, httplib::Response & res) {
svr->Get(string_format("%s/", server_prefix), [](const httplib::Request & req, httplib::Response & res) {
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
res.set_content("Error: gzip is not supported by this browser", "text/plain");
} else {
Expand All @@ -4829,37 +4831,37 @@ int main(int argc, char ** argv) {
}

// register API routes
svr->Get ("/health", handle_health); // public endpoint (no API key check)
svr->Get ("/metrics", handle_metrics);
svr->Get ("/props", handle_props);
svr->Post("/props", handle_props_change);
svr->Post("/api/show", handle_api_show);
svr->Get ("/models", handle_models); // public endpoint (no API key check)
svr->Get ("/v1/models", handle_models); // public endpoint (no API key check)
svr->Get ("/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check)
svr->Post("/completion", handle_completions); // legacy
svr->Post("/completions", handle_completions);
svr->Post("/v1/completions", handle_completions_oai);
svr->Post("/chat/completions", handle_chat_completions);
svr->Post("/v1/chat/completions", handle_chat_completions);
svr->Post("/api/chat", handle_chat_completions); // ollama specific endpoint
svr->Post("/infill", handle_infill);
svr->Post("/embedding", handle_embeddings); // legacy
svr->Post("/embeddings", handle_embeddings);
svr->Post("/v1/embeddings", handle_embeddings_oai);
svr->Post("/rerank", handle_rerank);
svr->Post("/reranking", handle_rerank);
svr->Post("/v1/rerank", handle_rerank);
svr->Post("/v1/reranking", handle_rerank);
svr->Post("/tokenize", handle_tokenize);
svr->Post("/detokenize", handle_detokenize);
svr->Post("/apply-template", handle_apply_template);
svr->Get (string_format("%s/health", server_prefix), handle_health); // public endpoint (no API key check)
svr->Get (string_format("%s/metrics", server_prefix), handle_metrics);
svr->Get (string_format("%s/props", server_prefix), handle_props);
svr->Post(string_format("%s/props", server_prefix), handle_props_change);
svr->Post(string_format("%s/api/show", server_prefix), handle_api_show);
svr->Get (string_format("%s/models", server_prefix), handle_models); // public endpoint (no API key check)
svr->Get (string_format("%s/v1/models", server_prefix), handle_models); // public endpoint (no API key check)
svr->Get (string_format("%s/api/tags", server_prefix), handle_models); // ollama specific endpoint. public endpoint (no API key check)
svr->Post(string_format("%s/completion", server_prefix), handle_completions); // legacy
svr->Post(string_format("%s/completions", server_prefix), handle_completions);
svr->Post(string_format("%s/v1/completions", server_prefix), handle_completions_oai);
svr->Post(string_format("%s/chat/completions", server_prefix), handle_chat_completions);
svr->Post(string_format("%s/v1/chat/completions", server_prefix), handle_chat_completions);
svr->Post(string_format("%s/api/chat", server_prefix), handle_chat_completions); // ollama specific endpoint
svr->Post(string_format("%s/infill", server_prefix), handle_infill);
svr->Post(string_format("%s/embedding", server_prefix), handle_embeddings); // legacy
svr->Post(string_format("%s/embeddings", server_prefix), handle_embeddings);
svr->Post(string_format("%s/v1/embeddings", server_prefix), handle_embeddings_oai);
svr->Post(string_format("%s/rerank", server_prefix), handle_rerank);
svr->Post(string_format("%s/reranking", server_prefix), handle_rerank);
svr->Post(string_format("%s/v1/rerank", server_prefix), handle_rerank);
svr->Post(string_format("%s/v1/reranking", server_prefix), handle_rerank);
svr->Post(string_format("%s/tokenize", server_prefix), handle_tokenize);
svr->Post(string_format("%s/detokenize", server_prefix), handle_detokenize);
svr->Post(string_format("%s/apply-template", server_prefix), handle_apply_template);
// LoRA adapters hotswap
svr->Get ("/lora-adapters", handle_lora_adapters_list);
svr->Post("/lora-adapters", handle_lora_adapters_apply);
svr->Get (string_format("%s/lora-adapters", server_prefix), handle_lora_adapters_list);
svr->Post(string_format("%s/lora-adapters", server_prefix), handle_lora_adapters_apply);
// Save & load slots
svr->Get ("/slots", handle_slots);
svr->Post("/slots/:id_slot", handle_slots_action);
svr->Get (string_format("%s/slots", server_prefix), handle_slots);
svr->Post(string_format("%s/slots/:id_slot", server_prefix), handle_slots_action);

//
// Start the server
Expand Down
Loading