mostlygeek · rare-magma · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 9, 2026
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
 
 - ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
 - ✅ On-demand model switching
-- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.)
+- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, whisper.cpp etc.)
   - future proof, upgrade your inference servers at any time.
 - ✅ OpenAI API supported endpoints:
   - `v1/completions`
@@ -69,6 +69,7 @@ llama-swap can be installed in multiple ways
 ### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
 
 Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
+The whisper.cpp server is also included for the cuda, musa and vulkan platforms.
 The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
 
 ```shell

diff --git a/docker/build-container.sh b/docker/build-container.sh
@@ -45,6 +45,7 @@ fi
 # variable, this permits testing with forked llama.cpp repositories
 BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
 SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
+WH_IMAGE=${BASE_WHISPERCPP_IMAGE:-ghcr.io/ggml-org/whisper.cpp}
 
 # Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
 # to enable easy container builds on forked repos
@@ -112,6 +113,7 @@ else
 fi
 
 SD_TAG=master-${ARCH}
+WH_TAG=main-${ARCH}
 
 # Abort if LCPP_TAG is empty.
 if [[ -z "$LCPP_TAG" ]]; then
@@ -157,6 +159,17 @@ for CONTAINER_TYPE in non-root root; do
         -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
   esac
 
+  # For architectures with whisper.cpp support, layer whisper-server on top
+  case "$ARCH" in
+    "cuda" | "musa" | "vulkan")
+      log_info "Adding whisper-server to $CONTAINER_TAG"
+      docker build -f llama-swap-whisper.Containerfile \
+        --build-arg BASE=${CONTAINER_TAG} \
+        --build-arg WH_IMAGE=${WH_IMAGE} --build-arg WH_TAG=${WH_TAG} \
+        --build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
+        -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
+  esac
+
   if [ "$PUSH_IMAGES" == "true" ]; then
     docker push ${CONTAINER_TAG}
     docker push ${CONTAINER_LATEST}

diff --git a/docker/config.example.yaml b/docker/config.example.yaml
@@ -17,6 +17,14 @@ models:
       -hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
       --port 9999
 
+  whisper:
+    checkEndpoint: /v1/audio/transcriptions/
+    cmd: >
+      /app/whisper-server
+        --host 127.0.0.1 --port ${PORT}
+        -m /models/ggml-large-v3-turbo.bin
+        --request-path /v1/audio/transcriptions --inference-path ""
+
   z-image:
     checkEndpoint: /
     cmd: |

diff --git a/docker/llama-swap-whisper.Containerfile b/docker/llama-swap-whisper.Containerfile
@@ -0,0 +1,14 @@
+ARG WH_IMAGE=ghcr.io/ggml-org/whisper.cpp
+ARG WH_TAG=main-cuda
+ARG BASE=llama-swap:latest
+
+FROM ${WH_IMAGE}:${WH_TAG} AS ws-source
+FROM ${BASE}
+
+ARG UID=10001
+ARG GID=10001
+
+COPY --from=ws-source --chown=${UID}:${GID} /app/build/bin/whisper-server /app/whisper-server
+COPY --from=ws-source --chown=${UID}:${GID} /app/build/src/*.so* /app/
+
+WORKDIR /app