diff --git a/README.md b/README.md index 2fb40998..085fa792 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies - ✅ On-demand model switching -- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.) +- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, whisper.cpp etc.) - future proof, upgrade your inference servers at any time. - ✅ OpenAI API supported endpoints: - `v1/completions` @@ -69,6 +69,7 @@ llama-swap can be installed in multiple ways ### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap)) Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md). +The whisper.cpp server is also included for the cuda, musa and vulkan platforms. The stable-diffusion.cpp server is also included for the musa and vulkan platforms. ```shell diff --git a/docker/build-container.sh b/docker/build-container.sh index 19b96b3e..6f546f5b 100755 --- a/docker/build-container.sh +++ b/docker/build-container.sh @@ -45,6 +45,7 @@ fi # variable, this permits testing with forked llama.cpp repositories BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp} SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp} +WH_IMAGE=${BASE_WHISPERCPP_IMAGE:-ghcr.io/ggml-org/whisper.cpp} # Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable # to enable easy container builds on forked repos @@ -112,6 +113,7 @@ else fi SD_TAG=master-${ARCH} +WH_TAG=main-${ARCH} # Abort if LCPP_TAG is empty. if [[ -z "$LCPP_TAG" ]]; then @@ -157,6 +159,17 @@ for CONTAINER_TYPE in non-root root; do -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;; esac + # For architectures with whisper.cpp support, layer whisper-server on top + case "$ARCH" in + "cuda" | "musa" | "vulkan") + log_info "Adding whisper-server to $CONTAINER_TAG" + docker build -f llama-swap-whisper.Containerfile \ + --build-arg BASE=${CONTAINER_TAG} \ + --build-arg WH_IMAGE=${WH_IMAGE} --build-arg WH_TAG=${WH_TAG} \ + --build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \ + -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;; + esac + if [ "$PUSH_IMAGES" == "true" ]; then docker push ${CONTAINER_TAG} docker push ${CONTAINER_LATEST} diff --git a/docker/config.example.yaml b/docker/config.example.yaml index ee8bce58..f101452a 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -17,6 +17,14 @@ models: -hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M --port 9999 + whisper: + checkEndpoint: /v1/audio/transcriptions/ + cmd: > + /app/whisper-server + --host 127.0.0.1 --port ${PORT} + -m /models/ggml-large-v3-turbo.bin + --request-path /v1/audio/transcriptions --inference-path "" + z-image: checkEndpoint: / cmd: | diff --git a/docker/llama-swap-whisper.Containerfile b/docker/llama-swap-whisper.Containerfile new file mode 100644 index 00000000..0a4cd02c --- /dev/null +++ b/docker/llama-swap-whisper.Containerfile @@ -0,0 +1,14 @@ +ARG WH_IMAGE=ghcr.io/ggml-org/whisper.cpp +ARG WH_TAG=main-cuda +ARG BASE=llama-swap:latest + +FROM ${WH_IMAGE}:${WH_TAG} AS ws-source +FROM ${BASE} + +ARG UID=10001 +ARG GID=10001 + +COPY --from=ws-source --chown=${UID}:${GID} /app/build/bin/whisper-server /app/whisper-server +COPY --from=ws-source --chown=${UID}:${GID} /app/build/src/*.so* /app/ + +WORKDIR /app \ No newline at end of file