From 6cc856e9c4c3652049c087f78ce7704b6e0188b4 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sun, 22 Mar 2026 23:54:15 +0000 Subject: [PATCH 1/4] docker/unified: improve GHA caching --- docker/unified/build-image.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index 8ac42842..d1e00715 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -155,6 +155,12 @@ BUILD_ARGS=( if [[ "$NO_CACHE" == true ]]; then BUILD_ARGS+=(--no-cache) echo "Note: Building without cache" +elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then + BUILD_ARGS+=( + --cache-from "type=gha" + --cache-to "type=gha,mode=max" + ) + echo "Note: Using GitHub Actions cache" fi DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}" From 2f5938711d2e2aba432f6819257317ad7142304b Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 23 Mar 2026 02:21:16 +0000 Subject: [PATCH 2/4] .github,docker: switch to registry type cache The GHA cache has a limit of 10GB which the docker layers and ccache already exceeds. Switching to the registry type cache does not have that limit and will help eliminate unnecessary recompiling work. --- .github/workflows/unified-docker.yml | 3 +-- docker/unified/build-image.sh | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index c75afa5c..022c4cdd 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -50,9 +50,8 @@ jobs: if: ${{ !env.ACT }} uses: docker/setup-buildx-action@v3 - # Disabled until ready to publish - name: Log in to GitHub Container Registry - if: false + if: ${{ !env.ACT }} uses: docker/login-action@v3 with: registry: ghcr.io diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index d1e00715..0ba42440 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -156,11 +156,12 @@ if [[ "$NO_CACHE" == true ]]; then BUILD_ARGS+=(--no-cache) echo "Note: Building without cache" elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then + CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache" BUILD_ARGS+=( - --cache-from "type=gha" - --cache-to "type=gha,mode=max" + --cache-from "type=registry,ref=${CACHE_REF}" + --cache-to "type=registry,ref=${CACHE_REF},mode=max" ) - echo "Note: Using GitHub Actions cache" + echo "Note: Using registry cache (${CACHE_REF})" fi DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}" From 13f000eb3e334cac0871722e97a93112cd59a691 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 23 Mar 2026 09:36:53 +0000 Subject: [PATCH 3/4] docker/unified: add non-root user, clean up final runtime container - add llama-swap user - set ENTRYPOINT to llama-swap - remove unnecessary steps from runtime container --- docker/unified/Dockerfile | 21 ++++++++----------- docker/unified/build-image.sh | 4 ++-- docker/unified/config.example.yaml | 33 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 docker/unified/config.example.yaml diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile index 8a865a87..a6ba3937 100644 --- a/docker/unified/Dockerfile +++ b/docker/unified/Dockerfile @@ -74,6 +74,11 @@ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/ RUN pip3 install --no-cache-dir numpy sentencepiece +# Create llama-swap user and config directory +RUN useradd --system --no-create-home --shell /sbin/nologin llama-swap && \ + mkdir -p /etc/llama-swap/config && \ + chown -R llama-swap:llama-swap /etc/llama-swap + WORKDIR /app # Copy whisper.cpp binaries and libraries @@ -97,17 +102,7 @@ COPY --from=llama-swap-download /install/llama-swap-version /tmp/ RUN ldconfig -# Convenience symlinks -RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ - ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ - ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion - -# Validate all binaries exist -RUN set -e && \ - for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \ - test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \ - done && \ - echo "All binaries validated successfully" +COPY config.example.yaml /etc/llama-swap/config/config.yaml # Version tracking RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ @@ -118,4 +113,6 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt WORKDIR /models -CMD ["bash"] +USER llama-swap +ENTRYPOINT ["llama-swap"] +CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"] diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index 0ba42440..534178dd 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -174,7 +174,7 @@ echo "" MISSING_BINARIES=() for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do - if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then + if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then MISSING_BINARIES+=("${binary}") fi done @@ -203,7 +203,7 @@ echo "Built with:" echo " llama.cpp: ${LLAMA_HASH}" echo " whisper.cpp: ${WHISPER_HASH}" echo " stable-diffusion.cpp: ${SD_HASH}" -echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)" +echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)" echo "" echo "Run with:" echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" diff --git a/docker/unified/config.example.yaml b/docker/unified/config.example.yaml new file mode 100644 index 00000000..66630d90 --- /dev/null +++ b/docker/unified/config.example.yaml @@ -0,0 +1,33 @@ +# placeholder example configuration +healthCheckTimeout: 300 +logRequests: true + +models: + "llama": + cmd: > + llama-server + -hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M + --port ${PORT} + + "whisper": + checkEndpoint: /v1/audio/transcriptions/ + cmd: > + whisper-server + --port ${PORT} + --m /models/whisper.bin + --flash-attn + --request-path /v1/audio/transcriptions --inference-path "" + + "image": + checkEndpoint: / + cmd: | + /app/sd-server + --listen-port 9999 + --diffusion-fa + --diffusion-model /models/z_image_turbo-Q8_0.gguf + --vae /models/ae.safetensors + --llm /models/qwen3-4b-instruct-2507-q8_0.gguf + --offload-to-cpu + --cfg-scale 1.0 + --height 512 --width 512 + --steps 8 From c32008948014369a87db25072c069a2801108814 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 23 Mar 2026 09:58:23 +0000 Subject: [PATCH 4/4] docker/unified: resolve llama-swap ref and enable publishing - Resolve llama-swap git ref to full commit hash (same as other projects) - Update install-llama-swap.sh to map commit hash back to release tag - Enable daily cron schedule matching containers.yml (37 5 * * *) - Default all cpp refs to master, llama-swap to main - Enable GHCR push with dated tag (unified-YYYY-MM-DD) - Use registry cache only on real GHA runs (skip under act) Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/unified-docker.yml | 29 ++++++++++++++++------------ docker/unified/build-image.sh | 20 ++++++++++++++----- docker/unified/install-llama-swap.sh | 19 ++++++++++++++++-- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index 022c4cdd..31b45703 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -1,24 +1,27 @@ name: Build Unified Docker Image on: + schedule: + - cron: "37 5 * * *" + workflow_dispatch: inputs: llama_cpp_ref: description: "llama.cpp commit hash, tag, or branch" required: false - default: "b8468" + default: "master" whisper_ref: description: "whisper.cpp commit hash, tag, or branch" required: false - default: "v1.8.4" + default: "master" sd_ref: description: "stable-diffusion.cpp commit hash, tag, or branch" required: false - default: "545fac4" + default: "master" llama_swap_version: - description: "llama-swap version (e.g. v198, latest)" + description: "llama-swap version (e.g. v198, latest, main)" required: false - default: "v198" + default: "main" permissions: contents: read @@ -60,10 +63,10 @@ jobs: - name: Build unified Docker image env: - LLAMA_REF: ${{ inputs.llama_cpp_ref }} - WHISPER_REF: ${{ inputs.whisper_ref }} - SD_REF: ${{ inputs.sd_ref }} - LS_VERSION: ${{ inputs.llama_swap_version }} + LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }} + WHISPER_REF: ${{ inputs.whisper_ref || 'master' }} + SD_REF: ${{ inputs.sd_ref || 'master' }} + LS_VERSION: ${{ inputs.llama_swap_version || 'main' }} DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified # When running under act, use the local builder that has warm ccache. # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder @@ -73,7 +76,9 @@ jobs: chmod +x docker/unified/build-image.sh docker/unified/build-image.sh - # Disabled until ready to publish - name: Push to GitHub Container Registry - if: false - run: docker push ghcr.io/mostlygeek/llama-swap:unified + if: ${{ !env.ACT }} + run: | + docker push ghcr.io/mostlygeek/llama-swap:unified + docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d) + docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d) diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index 534178dd..408ecb7d 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -42,6 +42,7 @@ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}" LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" +LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git" # Resolve a git ref (commit hash, tag, or branch) to a full commit hash. # Requires only: git, network access to the remote. @@ -131,9 +132,18 @@ else echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}" fi -# Resolve llama-swap version -LS_VER="${LS_VERSION:-latest}" -echo "llama-swap: ${LS_VER}" +# Resolve llama-swap ref +if [[ -n "${LS_VERSION:-}" ]]; then + LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1 + echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}" +else + LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}") + if [[ -z "${LS_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for llama-swap" >&2 + exit 1 + fi + echo "llama-swap: latest HEAD: ${LS_HASH}" +fi echo "" echo "==========================================" @@ -147,7 +157,7 @@ BUILD_ARGS=( --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" --build-arg "SD_COMMIT_HASH=${SD_HASH}" - --build-arg "LS_VERSION=${LS_VER}" + --build-arg "LS_VERSION=${LS_HASH}" -t "${DOCKER_IMAGE_TAG}" -f "${SCRIPT_DIR}/Dockerfile" ) @@ -155,7 +165,7 @@ BUILD_ARGS=( if [[ "$NO_CACHE" == true ]]; then BUILD_ARGS+=(--no-cache) echo "Note: Building without cache" -elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then +elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache" BUILD_ARGS+=( --cache-from "type=registry,ref=${CACHE_REF}" diff --git a/docker/unified/install-llama-swap.sh b/docker/unified/install-llama-swap.sh index f98aaac8..ca6437ec 100755 --- a/docker/unified/install-llama-swap.sh +++ b/docker/unified/install-llama-swap.sh @@ -5,12 +5,27 @@ set -e VERSION="${1:-latest}" -# Strip leading 'v' prefix so both "198" and "v198" work -VERSION="${VERSION#v}" REPO="mostlygeek/llama-swap" mkdir -p /install/bin +# If a full commit hash is given, find the release tag that points to it +if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then + echo "=== Resolving commit ${VERSION:0:7} to release tag ===" + TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \ + | grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1) + if [ -n "${TAG}" ]; then + echo "Resolved to tag: ${TAG}" + VERSION="${TAG#v}" + else + echo "No release tag found for commit ${VERSION:0:7}, using latest" + VERSION="latest" + fi +fi + +# Strip leading 'v' prefix so both "198" and "v198" work +VERSION="${VERSION#v}" + # Resolve "latest" to actual version number if [ "$VERSION" = "latest" ]; then echo "=== Resolving latest llama-swap release ==="