Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DLLAMA_BORINGSSL=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=OFF \
-DGGML_METAL_SHADER_DEBUG=ON \
Expand Down Expand Up @@ -119,7 +119,7 @@ jobs:
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DLLAMA_BORINGSSL=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON \
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
Expand Down Expand Up @@ -1043,7 +1043,7 @@ jobs:
id: cmake_build
run: |
cmake -S . -B build ${{ matrix.defines }} `
-DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
-DLLAMA_CURL=OFF -DLLAMA_BORINGSSL=ON
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}

- name: Add libopenblas.dll
Expand Down Expand Up @@ -1151,7 +1151,7 @@ jobs:
cmake -S . -B build -G "Ninja Multi-Config" ^
-DLLAMA_BUILD_SERVER=ON ^
-DLLAMA_CURL=OFF ^
-DLLAMA_BUILD_BORINGSSL=ON ^
-DLLAMA_BORINGSSL=ON ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
-DGGML_CPU_ALL_VARIANTS=ON ^
Expand Down Expand Up @@ -1259,7 +1259,7 @@ jobs:
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" `
-DCMAKE_BUILD_TYPE=Release `
-DLLAMA_CURL=OFF `
-DLLAMA_BUILD_BORINGSSL=ON `
-DLLAMA_BORINGSSL=ON `
-DROCM_DIR="${env:HIP_PATH}" `
-DGGML_HIP=ON `
-DGGML_HIP_ROCWMMA_FATTN=ON `
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- name: Build
id: cmake_build
run: |
cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BORINGSSL=ON
cmake --build build --config ${{ matrix.build_type }} -j ${env:NUMBER_OF_PROCESSORS} --target llama-server

- name: Python setup
Expand Down Expand Up @@ -108,7 +108,7 @@ jobs:
- name: Build
id: cmake_build
run: |
cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BORINGSSL=ON
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server

- name: Python setup
Expand Down
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,11 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})

# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
option(LLAMA_BORINGSSL "llama: use boringssl to support HTTPS" ON)
option(LLAMA_LIBRESSL "llama: use libressl to support HTTPS" OFF)
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)

# Required for relocatable CMake package
Expand Down
21 changes: 19 additions & 2 deletions tools/server/server-models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1000,10 +1000,19 @@ server_http_proxy::server_http_proxy(
int32_t timeout_write
) {
// shared between reader and writer threads
auto cli = std::make_shared<httplib::Client>(host, port);
auto cli = std::make_shared<httplib::ClientImpl>(host, port);
auto pipe = std::make_shared<pipe_t<msg_t>>();

if (port == 443) {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
cli.reset(new httplib::SSLClient(host, port));
#else
throw std::runtime_error("HTTPS requested but CPPHTTPLIB_OPENSSL_SUPPORT is not defined");
#endif
}

// setup Client
cli->set_follow_location(true);
cli->set_connection_timeout(0, 200000); // 200 milliseconds
cli->set_write_timeout(timeout_read, 0); // reversed for cli (client) vs srv (server)
cli->set_read_timeout(timeout_write, 0);
Expand Down Expand Up @@ -1053,7 +1062,15 @@ server_http_proxy::server_http_proxy(
req.method = method;
req.path = path;
for (const auto & [key, value] : headers) {
req.set_header(key, value);
if (key == "Accept-Encoding") {
// disable Accept-Encoding to avoid compressed responses
continue;
}
if (key == "Host" || key == "host") {
req.set_header(key, host);
} else {
req.set_header(key, value);
}
}
req.body = body;
req.response_handler = response_handler;
Expand Down
50 changes: 48 additions & 2 deletions tools/server/server-models.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "common.h"
#include "preset.h"
#include "http.h"
#include "server-common.h"
#include "server-http.h"

Expand Down Expand Up @@ -184,8 +185,8 @@ struct server_http_proxy : server_http_res {
const std::map<std::string, std::string> & headers,
const std::string & body,
const std::function<bool()> should_stop,
int32_t timeout_read,
int32_t timeout_write
int32_t timeout_read = 600,
int32_t timeout_write = 600
);
~server_http_proxy() {
if (cleanup) {
Expand All @@ -201,3 +202,48 @@ struct server_http_proxy : server_http_res {
std::string content_type;
};
};

// BELOW IS DEMO CODE FOR PROXY HANDLERS
// DO NOT MERGE IT AS-IS

static server_http_res_ptr proxy_request(const server_http_req & req, std::string method) {
std::string target_url = req.get_param("url");
common_http_url parsed_url = common_http_parse_url(target_url);

if (parsed_url.host.empty()) {
throw std::runtime_error("invalid target URL: missing host");
}

if (parsed_url.path.empty()) {
parsed_url.path = "/";
}

if (!parsed_url.password.empty()) {
throw std::runtime_error("authentication in target URL is not supported");
}

if (parsed_url.scheme != "http" && parsed_url.scheme != "https") {
throw std::runtime_error("unsupported URL scheme in target URL: " + parsed_url.scheme);
}

SRV_INF("proxying %s request to %s://%s%s\n", method.c_str(), parsed_url.scheme.c_str(), parsed_url.host.c_str(), parsed_url.path.c_str());

auto proxy = std::make_unique<server_http_proxy>(
method,
parsed_url.host,
parsed_url.scheme == "http" ? 80 : 443,
parsed_url.path,
req.headers,
req.body,
req.should_stop);

return proxy;
}

static server_http_context::handler_t proxy_handler_post = [](const server_http_req & req) -> server_http_res_ptr {
return proxy_request(req, "POST");
};

static server_http_context::handler_t proxy_handler_get = [](const server_http_req & req) -> server_http_res_ptr {
return proxy_request(req, "GET");
};
3 changes: 3 additions & 0 deletions tools/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,9 @@ int main(int argc, char ** argv) {
// Save & load slots
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
// CORS proxy
ctx_http.get ("/cors-proxy", ex_wrapper(proxy_handler_get));
ctx_http.post("/cors-proxy", ex_wrapper(proxy_handler_post));

//
// Start the server
Expand Down
4 changes: 2 additions & 2 deletions vendor/cpp-httplib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ target_compile_definitions(${TARGET} PRIVATE

set(OPENSSL_NO_ASM ON CACHE BOOL "Disable OpenSSL ASM code when building BoringSSL or LibreSSL")

if (LLAMA_BUILD_BORINGSSL)
if (LLAMA_BORINGSSL)
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")

set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
Expand Down Expand Up @@ -70,7 +70,7 @@ if (LLAMA_BUILD_BORINGSSL)
set(CPPHTTPLIB_OPENSSL_SUPPORT TRUE)
target_link_libraries(${TARGET} PUBLIC ssl crypto)

elseif (LLAMA_BUILD_LIBRESSL)
elseif (LLAMA_LIBRESSL)
set(LIBRESSL_VERSION "4.2.1" CACHE STRING "LibreSSL version")

message(STATUS "Fetching LibreSSL version ${LIBRESSL_VERSION}")
Expand Down
Loading