diff --git a/Makefile b/Makefile
index f7d18586a..f0fb12d2b 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ mac: ui
 linux: ui
 	@echo "Building Linux binary..."
 	GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
-	GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
+#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
 
 # Build Windows binary
 windows: ui
diff --git a/docker/.gitignore b/docker/.gitignore
new file mode 100644
index 000000000..b7697f06b
--- /dev/null
+++ b/docker/.gitignore
@@ -0,0 +1,2 @@
+# BuildKit configuration file (generated by build-image.sh)
+buildkitd.toml
diff --git a/docker/AGENTS.md b/docker/AGENTS.md
new file mode 100644
index 000000000..b42c74abe
--- /dev/null
+++ b/docker/AGENTS.md
@@ -0,0 +1,24 @@
+- you are working in my VM sandbox. It is safe to use sudo.
+- use or install whatever tools you need to complete your goal
+- use `docker buildx build --build-arg BACKEND=cuda` or `--build-arg BACKEND=vulkan` with the unified `docker/Dockerfile`
+- DOCKER_BUILDKIT=1 is required for cache mounts and conditional FROM stages
+- ALWAYS send notifications to get the user's attention
+- when running `./build-image.sh`, use a 2-hour (7200000ms) timeout minimum as CUDA builds take 60-120+ minutes to compile for multiple architectures
+
+# Adding a new server project
+
+1. Add source clone stage in `docker/Dockerfile` (FROM builder-base AS <project>-source)
+2. Add build stage with CUDA/Vulkan conditional cmake flags (FROM builder-base AS <project>-build)
+3. Add COPY lines in the runtime stage for binaries and libraries
+4. Add the binary name(s) to the validation RUN step in the runtime stage
+5. Add the repo URL and commit hash to `docker/build-image.sh`
+
+# Notifications
+
+ALWAYS send notifications to keep the user informed:
+
+- When starting or finishing a job
+- For progress updates on long-running tasks (especially Docker builds)
+- For todo list progress updates (when items start/complete)
+- When you need feedback or to elicit information from the user
+- use pushover.sh <message>, example: `pushover.sh "notification to send"`
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 000000000..9d6fe044c
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,194 @@
+# Unified multi-stage Dockerfile for GPU-accelerated AI inference tools
+# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp, llama-swap
+#
+# Usage:
+#   docker buildx build --build-arg BACKEND=cuda  -t llama-swap:cuda .
+#   docker buildx build --build-arg BACKEND=vulkan -t llama-swap:vulkan .
+#
+# Adding a new server project:
+#   1. Add a case to install.sh with project-specific cmake flags and targets
+#   2. Add a source clone stage (FROM builder-base AS <project>-source)
+#   3. Add a build stage that runs: bash /build/install.sh "$BACKEND" <project>
+#   4. Add COPY lines in the runtime stage
+#   5. Add the binary name(s) to the validation RUN step
+#   6. Update build-image.sh with the new repo URL and commit hash
+
+ARG BACKEND=cuda
+
+# Builder base: CUDA devel image + Vulkan SDK (supports both backends)
+FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
+ENV CCACHE_DIR=/ccache
+ENV CCACHE_MAXSIZE=2G
+ENV PATH="/usr/lib/ccache:${PATH}"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential cmake git python3 python3-pip libssl-dev \
+    curl ca-certificates ccache make wget xz-utils unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG VULKAN_SDK_VERSION=1.3.275.0
+RUN wget -q --show-progress \
+    https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz \
+    -O /tmp/vulkan-sdk.tar.xz && \
+    mkdir -p /opt && \
+    tar -xf /tmp/vulkan-sdk.tar.xz -C /opt && \
+    rm /tmp/vulkan-sdk.tar.xz
+
+ENV VULKAN_SDK=/opt/${VULKAN_SDK_VERSION}/x86_64
+ENV PATH="${VULKAN_SDK}/bin:${PATH}"
+ENV CMAKE_PREFIX_PATH="${VULKAN_SDK}"
+ENV VULKAN_INCLUDE_DIRS="${VULKAN_SDK}/include"
+
+WORKDIR /src
+
+# Build llama-swap from local source
+FROM golang:1.25-alpine AS llama-swap-builder
+WORKDIR /app
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64
+RUN go build -o llama-swap .
+RUN if git rev-parse --git-dir > /dev/null 2>&1; then \
+        VERSION=$(git rev-parse --short HEAD) && \
+        if [ -n "$(git status --porcelain)" ]; then \
+            VERSION="${VERSION}+dirty"; \
+        fi && \
+        echo "$VERSION" > /app/llama-swap-version; \
+    else \
+        echo "local-build" > /app/llama-swap-version; \
+    fi
+
+# Source cloning (cached independently per project)
+FROM builder-base AS llama-source
+ARG LLAMA_COMMIT_HASH=master
+RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/llama.cpp.git /src/llama.cpp && \
+    cd /src/llama.cpp && \
+    git fetch --depth=1 origin ${LLAMA_COMMIT_HASH} && \
+    git checkout FETCH_HEAD
+
+FROM builder-base AS whisper-source
+ARG WHISPER_COMMIT_HASH=master
+RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/whisper.cpp.git /src/whisper.cpp && \
+    cd /src/whisper.cpp && \
+    git fetch --depth=1 origin ${WHISPER_COMMIT_HASH} && \
+    git checkout FETCH_HEAD
+
+FROM builder-base AS sd-source
+ARG SD_COMMIT_HASH=master
+RUN git clone --filter=blob:none --no-checkout https://github.com/leejet/stable-diffusion.cpp.git /src/stable-diffusion.cpp && \
+    cd /src/stable-diffusion.cpp && \
+    git fetch --depth=1 origin ${SD_COMMIT_HASH} && \
+    git checkout FETCH_HEAD && \
+    git submodule update --init --recursive --depth=1
+
+# Project builds (ordered by build time: fastest first)
+FROM builder-base AS whisper-build
+ARG BACKEND=cuda
+COPY --from=whisper-source /src/whisper.cpp /build/whisper.cpp
+COPY docker/install.sh /build/
+WORKDIR /build/whisper.cpp
+RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
+    --mount=type=cache,id=whisper-${BACKEND},target=/build/whisper.cpp/build \
+    bash /build/install.sh "$BACKEND" whisper
+
+FROM builder-base AS sd-build
+ARG BACKEND=cuda
+ARG SD_COMMIT_HASH=master
+COPY --from=sd-source /src/stable-diffusion.cpp /build/stable-diffusion.cpp
+COPY docker/install.sh /build/
+WORKDIR /build/stable-diffusion.cpp
+RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
+    --mount=type=cache,id=sd-${BACKEND},target=/build/stable-diffusion.cpp/build \
+    RELEASE_TAG="${SD_COMMIT_HASH}" bash /build/install.sh "$BACKEND" sd
+
+FROM builder-base AS llama-build
+ARG BACKEND=cuda
+ARG LLAMA_COMMIT_HASH=master
+COPY --from=llama-source /src/llama.cpp /build/llama.cpp
+COPY docker/install.sh /build/
+WORKDIR /build/llama.cpp
+RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
+    --mount=type=cache,id=llama-${BACKEND},target=/build/llama.cpp/build \
+    RELEASE_TAG="${LLAMA_COMMIT_HASH}" bash /build/install.sh "$BACKEND" llama
+
+# CUDA runtime
+FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgomp1 python3 python3-pip curl ca-certificates git \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
+COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
+
+# Vulkan runtime
+FROM ubuntu:22.04 AS runtime-vulkan
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV VK_DRIVER_FILES=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json:/usr/share/vulkan/icd.d/radeon_icd.x86_64.json:/usr/share/vulkan/icd.d/intel_icd.x86_64.json
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgomp1 libvulkan1 mesa-vulkan-drivers vulkan-tools \
+    python3 python3-pip curl ca-certificates git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Final runtime
+FROM runtime-${BACKEND} AS runtime
+
+ARG BACKEND=cuda
+ARG LLAMA_COMMIT_HASH=unknown
+ARG WHISPER_COMMIT_HASH=unknown
+ARG SD_COMMIT_HASH=unknown
+
+ENV PATH="/usr/local/bin:${PATH}"
+
+COPY --from=llama-swap-builder /app/llama-swap /usr/local/bin/
+COPY --from=llama-swap-builder /app/llama-swap-version /tmp/
+
+COPY docker/test-binaries.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/test-binaries.sh
+
+RUN pip3 install --no-cache-dir numpy sentencepiece
+
+WORKDIR /app
+
+COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
+COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
+COPY --from=llama-build /install/lib/ /usr/local/lib/
+
+COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/
+COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/
+COPY --from=whisper-build /install/lib/ /usr/local/lib/
+
+COPY --from=sd-build /install/bin/sd-server /usr/local/bin/
+COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/
+COPY --from=sd-build /install/lib/ /usr/local/lib/
+
+RUN ldconfig
+
+RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \
+    ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \
+    ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion
+
+RUN set -e && \
+    for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \
+        test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \
+    done && \
+    echo "All binaries validated successfully"
+
+RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
+    echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
+    echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
+    echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
+    echo "backend: ${BACKEND}" >> /versions.txt && \
+    echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
+
+WORKDIR /models
+CMD ["bash"]
diff --git a/docker/build-image.sh b/docker/build-image.sh
new file mode 100755
index 000000000..7695b6042
--- /dev/null
+++ b/docker/build-image.sh
@@ -0,0 +1,305 @@
+#!/bin/bash
+#
+# Build script for llama-swap-docker with commit hash pinning
+#
+# Usage:
+#   ./build-image.sh --cuda                    # Build CUDA image
+#   ./build-image.sh --vulkan                  # Build Vulkan image
+#   ./build-image.sh --cuda --no-cache         # Build CUDA image without cache
+#   LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda      # Override llama.cpp commit
+#   LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan    # Override llama.cpp release tag (vulkan uses prebuilt binaries)
+#   WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan  # Override whisper.cpp commit
+#   SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda        # Override stable-diffusion.cpp commit
+#
+# Features:
+#   - Auto-detects latest commit hashes from git repos
+#   - Builds llama-swap from local source code
+#   - Allows environment variable overrides for reproducible builds
+#   - Cache-friendly: changing commit hash busts cache appropriately
+#   - Supports both CUDA and Vulkan backends (requires explicit flag)
+#
+
+set -euo pipefail
+
+# Parse command line arguments
+BACKEND=""
+NO_CACHE=false
+
+if [[ $# -eq 0 ]]; then
+    echo "Error: No backend specified. Please use --cuda or --vulkan."
+    echo ""
+    echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
+    echo ""
+    echo "Options:"
+    echo "  --cuda      Build CUDA image (NVIDIA GPUs)"
+    echo "  --vulkan    Build Vulkan image (AMD GPUs and compatible hardware)"
+    echo "  --no-cache  Force rebuild without using Docker cache"
+    echo "  --help, -h  Show this help message"
+    echo ""
+    echo "Environment variables:"
+    echo "  DOCKER_IMAGE_TAG     Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
+    echo "  LLAMA_COMMIT_HASH    Override llama.cpp commit hash"
+    echo "  WHISPER_COMMIT_HASH  Override whisper.cpp commit hash"
+    echo "  SD_COMMIT_HASH       Override stable-diffusion.cpp commit hash"
+    exit 1
+fi
+
+for arg in "$@"; do
+    case $arg in
+        --cuda)
+            BACKEND="cuda"
+            ;;
+        --vulkan)
+            BACKEND="vulkan"
+            ;;
+        --no-cache)
+            NO_CACHE=true
+            ;;
+        --help|-h)
+            echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
+            echo ""
+            echo "Options:"
+            echo "  --cuda      Build CUDA image (NVIDIA GPUs)"
+            echo "  --vulkan    Build Vulkan image (AMD GPUs and compatible hardware)"
+            echo "  --no-cache  Force rebuild without using Docker cache"
+            echo "  --help, -h  Show this help message"
+            echo ""
+            echo "Environment variables:"
+            echo "  DOCKER_IMAGE_TAG     Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
+            echo "  LLAMA_COMMIT_HASH    Override llama.cpp commit hash"
+            echo "  WHISPER_COMMIT_HASH  Override whisper.cpp commit hash"
+            echo "  SD_COMMIT_HASH       Override stable-diffusion.cpp commit hash"
+            exit 0
+            ;;
+    esac
+done
+
+# Validate backend selection
+if [[ -z "$BACKEND" ]]; then
+    echo "Error: No backend specified. Please use --cuda or --vulkan."
+    exit 1
+fi
+
+# Configuration
+if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then
+    # User provided a custom tag, use it as-is
+    :
+elif [[ "$BACKEND" == "vulkan" ]]; then
+    DOCKER_IMAGE_TAG="llama-swap:vulkan"
+else
+    DOCKER_IMAGE_TAG="llama-swap:cuda"
+fi
+DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}"
+
+# Single unified Dockerfile, backend selected via build arg
+DOCKERFILE="Dockerfile"
+if [[ "$BACKEND" == "vulkan" ]]; then
+    echo "Building for: Vulkan (AMD GPUs and compatible hardware)"
+else
+    echo "Building for: CUDA (NVIDIA GPUs)"
+fi
+
+# Git repository URLs
+LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
+WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
+SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
+
+# Function to get the latest commit hash from a git repo's default branch
+get_latest_commit() {
+    local repo_url="$1"
+    local branch="${2:-master}"
+
+    # Try to get the latest commit hash for the specified branch
+    git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1
+}
+
+# Function to get the default branch name (master or main)
+get_default_branch() {
+    local repo_url="$1"
+
+    # Check for master first
+    if git ls-remote --heads "${repo_url}" master &>/dev/null; then
+        echo "master"
+    elif git ls-remote --heads "${repo_url}" main &>/dev/null; then
+        echo "main"
+    else
+        echo "master"  # fallback
+    fi
+}
+
+# Function to get the latest release tag from a GitHub repo
+get_latest_release_tag() {
+    local owner_repo="$1"
+    curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \
+        | grep '"tag_name"' | head -1 | cut -d'"' -f4
+}
+
+echo "=========================================="
+echo "llama-swap-docker Build Script"
+echo "=========================================="
+echo ""
+
+# Determine commit hashes / release tags - use env vars or auto-detect
+# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries).
+# For cuda builds (or whisper on any backend), use git commit hashes.
+if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then
+    LLAMA_HASH="${LLAMA_COMMIT_HASH}"
+    echo "llama.cpp: Using provided version: ${LLAMA_HASH}"
+elif [[ "$BACKEND" == "vulkan" ]]; then
+    LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp")
+    if [[ -z "${LLAMA_HASH}" ]]; then
+        echo "ERROR: Could not determine latest release tag for llama.cpp" >&2
+        exit 1
+    fi
+    echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}"
+else
+    LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}")
+    LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}")
+    if [[ -z "${LLAMA_HASH}" ]]; then
+        echo "ERROR: Could not determine latest commit for llama.cpp" >&2
+        exit 1
+    fi
+    echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}"
+fi
+
+if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then
+    WHISPER_HASH="${WHISPER_COMMIT_HASH}"
+    echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}"
+else
+    WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}")
+    WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}")
+    if [[ -z "${WHISPER_HASH}" ]]; then
+        echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
+        exit 1
+    fi
+    echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}"
+fi
+
+if [[ -n "${SD_COMMIT_HASH:-}" ]]; then
+    SD_HASH="${SD_COMMIT_HASH}"
+    echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}"
+elif [[ "$BACKEND" == "vulkan" ]]; then
+    SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp")
+    if [[ -z "${SD_HASH}" ]]; then
+        echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2
+        exit 1
+    fi
+    echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}"
+else
+    SD_BRANCH=$(get_default_branch "${SD_REPO}")
+    SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}")
+    if [[ -z "${SD_HASH}" ]]; then
+        echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
+        exit 1
+    fi
+    echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}"
+fi
+
+echo ""
+echo "=========================================="
+echo "Starting Docker build..."
+echo "=========================================="
+echo ""
+
+# Build the Docker image with commit hashes as build args
+# Build context is the repository root (..) so the Dockerfile can access Go source
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+BUILD_ARGS=(
+    --build-arg "BACKEND=${BACKEND}"
+    --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
+    --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
+    --build-arg "SD_COMMIT_HASH=${SD_HASH}"
+    -t "${DOCKER_IMAGE_TAG}"
+    -f "${SCRIPT_DIR}/${DOCKERFILE}"
+)
+
+if [[ "$NO_CACHE" == true ]]; then
+    BUILD_ARGS+=(--no-cache)
+    echo "Note: Building without cache"
+fi
+
+# Use docker buildx with a custom builder for parallelism control
+# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var
+# We need to use a custom builder with a buildkitd.toml config file
+BUILDER_NAME="llama-swap-builder"
+
+# Check if our custom builder exists with the right config, create/update if needed
+if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then
+    echo "Creating custom buildx builder with max-parallelism=1..."
+    
+    # Create buildkitd.toml config file
+    cat > buildkitd.toml << 'BUILDKIT_EOF'
+[worker.oci]
+  max-parallelism = 1
+BUILDKIT_EOF
+    
+    # Create the builder with the config
+    docker buildx create --name "$BUILDER_NAME" \
+        --driver docker-container \
+        --buildkitd-config buildkitd.toml \
+        --use
+else
+    # Switch to our builder
+    docker buildx use "$BUILDER_NAME"
+fi
+
+echo "Building with sequential stages (one at a time), each using all CPU cores..."
+echo "Using builder: $BUILDER_NAME"
+
+# Use docker buildx build with --load to load the image into Docker
+# The --builder flag ensures we use our custom builder with max-parallelism=1
+# Build context is the repository root so we can access Go source files
+docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}"
+
+echo ""
+echo "=========================================="
+echo "Verifying build artifacts..."
+echo "=========================================="
+echo ""
+
+# Verify all expected binaries exist in the image
+MISSING_BINARIES=()
+
+for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
+    if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
+        MISSING_BINARIES+=("${binary}")
+    fi
+done
+
+if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
+    echo "ERROR: Build succeeded but the following binaries are missing from the image:"
+    for binary in "${MISSING_BINARIES[@]}"; do
+        echo "  - ${binary}"
+    done
+    echo ""
+    echo "This usually indicates a build stage failure. Try running with --no-cache flag:"
+    echo "  ./build-image.sh --vulkan --no-cache"
+    exit 1
+fi
+
+echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
+
+echo ""
+echo "=========================================="
+echo "Build complete!"
+echo "=========================================="
+echo ""
+echo "Image tag: ${DOCKER_IMAGE_TAG}"
+echo ""
+echo "Built with:"
+echo "  llama.cpp:           ${LLAMA_HASH}"
+echo "  whisper.cpp:         ${WHISPER_HASH}"
+echo "  stable-diffusion.cpp: ${SD_HASH}"
+echo "  llama-swap:          $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
+echo ""
+if [[ "$BACKEND" == "vulkan" ]]; then
+    echo "Run with:"
+    echo "  docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
+    echo ""
+    echo "Note: For AMD GPUs, you may also need to mount render devices:"
+    echo "  docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
+else
+    echo "Run with:"
+    echo "  docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
+fi
diff --git a/docker/install.sh b/docker/install.sh
new file mode 100644
index 000000000..e4b6aaf02
--- /dev/null
+++ b/docker/install.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# Usage: ./install.sh <cuda|vulkan> <llama|whisper|sd>
+#
+# For vulkan builds of llama and sd, downloads prebuilt binaries from GitHub
+# releases instead of building from source. Requires RELEASE_TAG env var.
+# whisper.cpp has no prebuilt vulkan binaries, so it always builds from source.
+set -e
+
+BACKEND="$1"
+PROJECT="$2"
+
+if [ -z "$BACKEND" ] || [ -z "$PROJECT" ]; then
+    echo "Usage: $0 <cuda|vulkan> <llama|whisper|sd>" >&2
+    exit 1
+fi
+
+mkdir -p /install/bin /install/lib
+
+# ---------------------------------------------------------------------------
+# Vulkan prebuilt binary download for llama and sd
+# ---------------------------------------------------------------------------
+if [ "$BACKEND" = "vulkan" ] && [ "$PROJECT" != "whisper" ]; then
+    if [ -z "${RELEASE_TAG:-}" ]; then
+        echo "ERROR: RELEASE_TAG env var required for vulkan prebuilt download of $PROJECT" >&2
+        exit 1
+    fi
+
+    TMPDIR=$(mktemp -d)
+    trap 'rm -rf "$TMPDIR"' EXIT
+
+    case "$PROJECT" in
+        llama)
+            # tag: b8429  asset: llama-b8429-bin-ubuntu-vulkan-x64.tar.gz
+            ASSET="llama-${RELEASE_TAG}-bin-ubuntu-vulkan-x64.tar.gz"
+            URL="https://github.com/ggml-org/llama.cpp/releases/download/${RELEASE_TAG}/${ASSET}"
+            echo "=== Downloading prebuilt llama.cpp vulkan binaries ==="
+            echo "URL: $URL"
+            curl -fSL -o "${TMPDIR}/release.tar.gz" "$URL"
+            tar xzf "${TMPDIR}/release.tar.gz" -C "${TMPDIR}"
+
+            find "${TMPDIR}" -name "llama-server" -type f -exec cp {} /install/bin/ \;
+            find "${TMPDIR}" -name "llama-cli" -type f -exec cp {} /install/bin/ \;
+            find "${TMPDIR}" -name "*.so*" -type f -exec cp {} /install/lib/ \;
+            EXPECTED_BINS="llama-server llama-cli"
+            ;;
+        sd)
+            # tag: master-536-5265a5e  asset: sd-master-5265a5e-bin-Linux-...-vulkan.zip
+            # The asset name drops the build number from the tag.
+            SD_BRANCH=$(echo "$RELEASE_TAG" | cut -d'-' -f1)
+            SD_HASH=$(echo "$RELEASE_TAG" | rev | cut -d'-' -f1 | rev)
+            ASSET="sd-${SD_BRANCH}-${SD_HASH}-bin-Linux-Ubuntu-24.04-x86_64-vulkan.zip"
+            URL="https://github.com/leejet/stable-diffusion.cpp/releases/download/${RELEASE_TAG}/${ASSET}"
+            echo "=== Downloading prebuilt sd.cpp vulkan binaries ==="
+            echo "URL: $URL"
+            curl -fSL -o "${TMPDIR}/release.zip" "$URL"
+            unzip -q "${TMPDIR}/release.zip" -d "${TMPDIR}"
+
+            # sd.cpp release names the CLI binary "sd", rename to sd-cli
+            if find "${TMPDIR}" -name "sd" -not -name "sd-*" -type f | grep -q .; then
+                find "${TMPDIR}" -name "sd" -not -name "sd-*" -type f -exec cp {} /install/bin/sd-cli \;
+            else
+                find "${TMPDIR}" -name "sd-cli" -type f -exec cp {} /install/bin/ \;
+            fi
+            find "${TMPDIR}" -name "sd-server" -type f -exec cp {} /install/bin/ \;
+            find "${TMPDIR}" -name "*.so*" -type f -exec cp {} /install/lib/ \;
+            EXPECTED_BINS="sd-cli sd-server"
+            ;;
+    esac
+
+    # Verify expected binaries were extracted
+    for bin in $EXPECTED_BINS; do
+        if [ ! -f "/install/bin/$bin" ]; then
+            echo "ERROR: $bin not found in downloaded release" >&2
+            echo "Archive contents:" >&2
+            find "${TMPDIR}" -type f >&2
+            exit 1
+        fi
+    done
+
+    chmod +x /install/bin/*
+    echo "=== $PROJECT prebuilt vulkan binaries installed ==="
+    ls -la /install/bin/
+    exit 0
+fi
+
+# ---------------------------------------------------------------------------
+# Build from source (cuda, or vulkan whisper)
+# ---------------------------------------------------------------------------
+COMMON_FLAGS="-DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE=Release"
+
+case "$BACKEND" in
+    cuda)
+        COMMON_FLAGS="$COMMON_FLAGS
+            -DGGML_CUDA=ON
+            -DGGML_VULKAN=OFF
+            -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}
+            -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler
+            -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda
+            -DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda
+            -DCMAKE_C_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache"
+        ;;
+    vulkan)
+        COMMON_FLAGS="$COMMON_FLAGS
+            -DGGML_VULKAN=ON
+            -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/include
+            -DVulkan_LIBRARY=${VULKAN_SDK}/lib/libvulkan.so"
+        ;;
+    *)
+        echo "Unknown backend: $BACKEND" >&2
+        exit 1
+        ;;
+esac
+
+case "$PROJECT" in
+    llama)
+        PROJECT_FLAGS="-DLLAMA_BUILD_TESTS=OFF"
+        [ "$BACKEND" = "vulkan" ] && PROJECT_FLAGS="$PROJECT_FLAGS -DGGML_BACKEND_DL=ON"
+        TARGETS="llama-cli llama-server"
+        ;;
+    whisper)
+        PROJECT_FLAGS=""
+        TARGETS="whisper-cli whisper-server"
+        ;;
+    sd)
+        PROJECT_FLAGS="-DSD_BUILD_EXAMPLES=OFF"
+        [ "$BACKEND" = "cuda" ]  && PROJECT_FLAGS="$PROJECT_FLAGS -DSD_CUDA=ON"
+        [ "$BACKEND" = "vulkan" ] && PROJECT_FLAGS="$PROJECT_FLAGS -DSD_VULKAN=ON"
+        TARGETS="sd-cli sd-server"
+        ;;
+    *)
+        echo "Unknown project: $PROJECT" >&2
+        exit 1
+        ;;
+esac
+
+rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
+
+echo "=== Building $PROJECT for $BACKEND ==="
+
+# shellcheck disable=SC2086
+cmake -B build $COMMON_FLAGS $PROJECT_FLAGS
+# shellcheck disable=SC2086
+cmake --build build --config Release -j"$(nproc)" --target $TARGETS
+
+for bin in $TARGETS; do
+    if [ ! -f "build/bin/$bin" ]; then
+        echo "FATAL: $bin not found in build/bin/" >&2
+        exit 1
+    fi
+    cp "build/bin/$bin" "/install/bin/"
+done
+find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
+
+echo "=== $PROJECT build complete ==="
+ls -la /install/bin/
diff --git a/docker/test-binaries.sh b/docker/test-binaries.sh
new file mode 100755
index 000000000..1cced0a8b
--- /dev/null
+++ b/docker/test-binaries.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# Test script for verifying GPU-accelerated binaries work correctly
+# Supports both CUDA and Vulkan backends, auto-detecting the environment
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+print_info() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+print_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Detect if real NVIDIA drivers are available
+detect_cuda_drivers() {
+    local real_driver_paths=(
+        "/lib/x86_64-linux-gnu/libcuda.so.1"
+        "/usr/lib/x86_64-linux-gnu/libcuda.so.1"
+        "/usr/local/cuda/lib64/libcuda.so.1"
+    )
+
+    for path in "${real_driver_paths[@]}"; do
+        if [ -f "$path" ]; then
+            print_info "Real NVIDIA drivers found at: $path"
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+# Detect Vulkan ICD availability
+detect_vulkan() {
+    if [ -d "/usr/share/vulkan/icd.d" ] && ls /usr/share/vulkan/icd.d/*.json >/dev/null 2>&1; then
+        print_info "Vulkan ICDs found:"
+        ls /usr/share/vulkan/icd.d/*.json 2>/dev/null | while read -r f; do echo "  $f"; done
+        return 0
+    fi
+    return 1
+}
+
+# Main execution
+print_info "Starting binary tests..."
+
+# Set up GPU library environment
+if detect_cuda_drivers; then
+    print_info "Using real NVIDIA drivers"
+    export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+elif [ -d "/usr/local/cuda/lib64/stubs" ]; then
+    print_warn "No real NVIDIA drivers detected"
+    print_warn "Falling back to stub drivers for testing"
+    print_warn "GPU functionality will NOT be available"
+    export LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+    print_info "LD_LIBRARY_PATH set to: $LD_LIBRARY_PATH"
+elif detect_vulkan; then
+    print_info "Vulkan backend detected"
+else
+    print_warn "No GPU drivers detected (CPU-only mode)"
+fi
+
+# Test all expected server binaries
+BINARIES=(llama-server whisper-server sd-server)
+FAILED=0
+
+for binary in "${BINARIES[@]}"; do
+    print_info "Testing ${binary}..."
+    if command -v "$binary" &> /dev/null; then
+        if "$binary" --help > /dev/null 2>&1 || "$binary" -h > /dev/null 2>&1; then
+            print_info "  $binary: OK"
+        else
+            print_error "  $binary: Failed to run"
+            FAILED=1
+        fi
+    else
+        print_error "  $binary: Not found in PATH"
+        FAILED=1
+    fi
+done
+
+if [ "$FAILED" -ne 0 ]; then
+    print_error "Some binary tests failed!"
+    exit 1
+fi
+
+print_info "All binary tests passed!"
+
+# Additional environment info
+print_info "Environment information:"
+echo "  LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}"
+echo "  CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}"
+
+if command -v nvidia-smi &> /dev/null; then
+    print_info "nvidia-smi output:"
+    nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader 2>/dev/null || \
+        print_warn "nvidia-smi found but could not query GPU information"
+elif command -v vulkaninfo &> /dev/null; then
+    print_info "Vulkan device info:"
+    vulkaninfo --summary 2>/dev/null | head -20 || \
+        print_warn "vulkaninfo found but could not query device information"
+else
+    print_warn "No GPU query tools available (expected on CPU-only hosts)"
+fi
+
+exit 0