From 6cc856e9c4c3652049c087f78ce7704b6e0188b4 Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek+git@gmail.com>
Date: Sun, 22 Mar 2026 23:54:15 +0000
Subject: [PATCH 1/4] docker/unified: improve GHA caching

---
 docker/unified/build-image.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh
index 8ac42842..d1e00715 100755
--- a/docker/unified/build-image.sh
+++ b/docker/unified/build-image.sh
@@ -155,6 +155,12 @@ BUILD_ARGS=(
 if [[ "$NO_CACHE" == true ]]; then
     BUILD_ARGS+=(--no-cache)
     echo "Note: Building without cache"
+elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then
+    BUILD_ARGS+=(
+        --cache-from "type=gha"
+        --cache-to "type=gha,mode=max"
+    )
+    echo "Note: Using GitHub Actions cache"
 fi
 
 DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"

From 2f5938711d2e2aba432f6819257317ad7142304b Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek+git@gmail.com>
Date: Mon, 23 Mar 2026 02:21:16 +0000
Subject: [PATCH 2/4] .github,docker: switch to registry type cache

The GHA cache has a limit of 10GB which the docker layers and ccache
already exceeds. Switching to the registry type cache does not have that
limit and will help eliminate unnecessary recompiling work.
---
 .github/workflows/unified-docker.yml | 3 +--
 docker/unified/build-image.sh        | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml
index c75afa5c..022c4cdd 100644
--- a/.github/workflows/unified-docker.yml
+++ b/.github/workflows/unified-docker.yml
@@ -50,9 +50,8 @@ jobs:
         if: ${{ !env.ACT }}
         uses: docker/setup-buildx-action@v3
 
-      # Disabled until ready to publish
       - name: Log in to GitHub Container Registry
-        if: false
+        if: ${{ !env.ACT }}
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh
index d1e00715..0ba42440 100755
--- a/docker/unified/build-image.sh
+++ b/docker/unified/build-image.sh
@@ -156,11 +156,12 @@ if [[ "$NO_CACHE" == true ]]; then
     BUILD_ARGS+=(--no-cache)
     echo "Note: Building without cache"
 elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then
+    CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache"
     BUILD_ARGS+=(
-        --cache-from "type=gha"
-        --cache-to "type=gha,mode=max"
+        --cache-from "type=registry,ref=${CACHE_REF}"
+        --cache-to "type=registry,ref=${CACHE_REF},mode=max"
     )
-    echo "Note: Using GitHub Actions cache"
+    echo "Note: Using registry cache (${CACHE_REF})"
 fi
 
 DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"

From 13f000eb3e334cac0871722e97a93112cd59a691 Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek+git@gmail.com>
Date: Mon, 23 Mar 2026 09:36:53 +0000
Subject: [PATCH 3/4] docker/unified: add non-root user, clean up final runtime
 container

- add llama-swap user
- set ENTRYPOINT to llama-swap
- remove unnecessary steps from runtime container
---
 docker/unified/Dockerfile          | 21 ++++++++-----------
 docker/unified/build-image.sh      |  4 ++--
 docker/unified/config.example.yaml | 33 ++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 14 deletions(-)
 create mode 100644 docker/unified/config.example.yaml

diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile
index 8a865a87..a6ba3937 100644
--- a/docker/unified/Dockerfile
+++ b/docker/unified/Dockerfile
@@ -74,6 +74,11 @@ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/
 
 RUN pip3 install --no-cache-dir numpy sentencepiece
 
+# Create llama-swap user and config directory
+RUN useradd --system --no-create-home --shell /sbin/nologin llama-swap && \
+    mkdir -p /etc/llama-swap/config && \
+    chown -R llama-swap:llama-swap /etc/llama-swap
+
 WORKDIR /app
 
 # Copy whisper.cpp binaries and libraries
@@ -97,17 +102,7 @@ COPY --from=llama-swap-download /install/llama-swap-version /tmp/
 
 RUN ldconfig
 
-# Convenience symlinks
-RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \
-    ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \
-    ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion
-
-# Validate all binaries exist
-RUN set -e && \
-    for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \
-        test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \
-    done && \
-    echo "All binaries validated successfully"
+COPY config.example.yaml /etc/llama-swap/config/config.yaml
 
 # Version tracking
 RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
@@ -118,4 +113,6 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
     echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
 
 WORKDIR /models
-CMD ["bash"]
+USER llama-swap
+ENTRYPOINT ["llama-swap"]
+CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh
index 0ba42440..534178dd 100755
--- a/docker/unified/build-image.sh
+++ b/docker/unified/build-image.sh
@@ -174,7 +174,7 @@ echo ""
 
 MISSING_BINARIES=()
 for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
-    if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
+    if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
         MISSING_BINARIES+=("${binary}")
     fi
 done
@@ -203,7 +203,7 @@ echo "Built with:"
 echo "  llama.cpp:           ${LLAMA_HASH}"
 echo "  whisper.cpp:         ${WHISPER_HASH}"
 echo "  stable-diffusion.cpp: ${SD_HASH}"
-echo "  llama-swap:          $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
+echo "  llama-swap:          $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
 echo ""
 echo "Run with:"
 echo "  docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
diff --git a/docker/unified/config.example.yaml b/docker/unified/config.example.yaml
new file mode 100644
index 00000000..66630d90
--- /dev/null
+++ b/docker/unified/config.example.yaml
@@ -0,0 +1,33 @@
+# placeholder example configuration
+healthCheckTimeout: 300
+logRequests: true
+
+models:
+  "llama":
+    cmd: >
+      llama-server
+      -hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
+      --port ${PORT}
+
+  "whisper":
+    checkEndpoint: /v1/audio/transcriptions/
+    cmd: >
+      whisper-server
+      --port ${PORT}
+      --m /models/whisper.bin
+      --flash-attn
+      --request-path /v1/audio/transcriptions --inference-path ""
+
+  "image":
+    checkEndpoint: /
+    cmd: |
+      /app/sd-server
+      --listen-port 9999
+      --diffusion-fa
+      --diffusion-model /models/z_image_turbo-Q8_0.gguf
+      --vae /models/ae.safetensors
+      --llm /models/qwen3-4b-instruct-2507-q8_0.gguf
+      --offload-to-cpu
+      --cfg-scale 1.0
+      --height 512 --width 512
+      --steps 8

From c32008948014369a87db25072c069a2801108814 Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek+git@gmail.com>
Date: Mon, 23 Mar 2026 09:58:23 +0000
Subject: [PATCH 4/4] docker/unified: resolve llama-swap ref and enable
 publishing

- Resolve llama-swap git ref to full commit hash (same as other projects)
- Update install-llama-swap.sh to map commit hash back to release tag
- Enable daily cron schedule matching containers.yml (37 5 * * *)
- Default all cpp refs to master, llama-swap to main
- Enable GHCR push with dated tag (unified-YYYY-MM-DD)
- Use registry cache only on real GHA runs (skip under act)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/unified-docker.yml | 29 ++++++++++++++++------------
 docker/unified/build-image.sh        | 20 ++++++++++++++-----
 docker/unified/install-llama-swap.sh | 19 ++++++++++++++++--
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml
index 022c4cdd..31b45703 100644
--- a/.github/workflows/unified-docker.yml
+++ b/.github/workflows/unified-docker.yml
@@ -1,24 +1,27 @@
 name: Build Unified Docker Image
 
 on:
+  schedule:
+    - cron: "37 5 * * *"
+
   workflow_dispatch:
     inputs:
       llama_cpp_ref:
         description: "llama.cpp commit hash, tag, or branch"
         required: false
-        default: "b8468"
+        default: "master"
       whisper_ref:
         description: "whisper.cpp commit hash, tag, or branch"
         required: false
-        default: "v1.8.4"
+        default: "master"
       sd_ref:
         description: "stable-diffusion.cpp commit hash, tag, or branch"
         required: false
-        default: "545fac4"
+        default: "master"
       llama_swap_version:
-        description: "llama-swap version (e.g. v198, latest)"
+        description: "llama-swap version (e.g. v198, latest, main)"
         required: false
-        default: "v198"
+        default: "main"
 
 permissions:
   contents: read
@@ -60,10 +63,10 @@ jobs:
 
       - name: Build unified Docker image
         env:
-          LLAMA_REF: ${{ inputs.llama_cpp_ref }}
-          WHISPER_REF: ${{ inputs.whisper_ref }}
-          SD_REF: ${{ inputs.sd_ref }}
-          LS_VERSION: ${{ inputs.llama_swap_version }}
+          LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
+          WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
+          SD_REF: ${{ inputs.sd_ref || 'master' }}
+          LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
           DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified
           # When running under act, use the local builder that has warm ccache.
           # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
@@ -73,7 +76,9 @@ jobs:
           chmod +x docker/unified/build-image.sh
           docker/unified/build-image.sh
 
-      # Disabled until ready to publish
       - name: Push to GitHub Container Registry
-        if: false
-        run: docker push ghcr.io/mostlygeek/llama-swap:unified
+        if: ${{ !env.ACT }}
+        run: |
+          docker push ghcr.io/mostlygeek/llama-swap:unified
+          docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
+          docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh
index 534178dd..408ecb7d 100755
--- a/docker/unified/build-image.sh
+++ b/docker/unified/build-image.sh
@@ -42,6 +42,7 @@ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}"
 LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
 WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
 SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
+LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
 
 # Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
 # Requires only: git, network access to the remote.
@@ -131,9 +132,18 @@ else
     echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
 fi
 
-# Resolve llama-swap version
-LS_VER="${LS_VERSION:-latest}"
-echo "llama-swap: ${LS_VER}"
+# Resolve llama-swap ref
+if [[ -n "${LS_VERSION:-}" ]]; then
+    LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
+    echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
+else
+    LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
+    if [[ -z "${LS_HASH}" ]]; then
+        echo "ERROR: Could not determine latest commit for llama-swap" >&2
+        exit 1
+    fi
+    echo "llama-swap: latest HEAD: ${LS_HASH}"
+fi
 
 echo ""
 echo "=========================================="
@@ -147,7 +157,7 @@ BUILD_ARGS=(
     --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
     --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
     --build-arg "SD_COMMIT_HASH=${SD_HASH}"
-    --build-arg "LS_VERSION=${LS_VER}"
+    --build-arg "LS_VERSION=${LS_HASH}"
     -t "${DOCKER_IMAGE_TAG}"
     -f "${SCRIPT_DIR}/Dockerfile"
 )
@@ -155,7 +165,7 @@ BUILD_ARGS=(
 if [[ "$NO_CACHE" == true ]]; then
     BUILD_ARGS+=(--no-cache)
     echo "Note: Building without cache"
-elif [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then
+elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
     CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache"
     BUILD_ARGS+=(
         --cache-from "type=registry,ref=${CACHE_REF}"
diff --git a/docker/unified/install-llama-swap.sh b/docker/unified/install-llama-swap.sh
index f98aaac8..ca6437ec 100755
--- a/docker/unified/install-llama-swap.sh
+++ b/docker/unified/install-llama-swap.sh
@@ -5,12 +5,27 @@
 set -e
 
 VERSION="${1:-latest}"
-# Strip leading 'v' prefix so both "198" and "v198" work
-VERSION="${VERSION#v}"
 REPO="mostlygeek/llama-swap"
 
 mkdir -p /install/bin
 
+# If a full commit hash is given, find the release tag that points to it
+if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
+    echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
+    TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
+        | grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
+    if [ -n "${TAG}" ]; then
+        echo "Resolved to tag: ${TAG}"
+        VERSION="${TAG#v}"
+    else
+        echo "No release tag found for commit ${VERSION:0:7}, using latest"
+        VERSION="latest"
+    fi
+fi
+
+# Strip leading 'v' prefix so both "198" and "v198" work
+VERSION="${VERSION#v}"
+
 # Resolve "latest" to actual version number
 if [ "$VERSION" = "latest" ]; then
     echo "=== Resolving latest llama-swap release ==="