Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions .github/workflows/unified-docker.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
name: Build Unified Docker Image

on:
schedule:
- cron: "37 5 * * *"

workflow_dispatch:
inputs:
llama_cpp_ref:
description: "llama.cpp commit hash, tag, or branch"
required: false
default: "b8468"
default: "master"
whisper_ref:
description: "whisper.cpp commit hash, tag, or branch"
required: false
default: "v1.8.4"
default: "master"
sd_ref:
description: "stable-diffusion.cpp commit hash, tag, or branch"
required: false
default: "545fac4"
default: "master"
llama_swap_version:
description: "llama-swap version (e.g. v198, latest)"
description: "llama-swap version (e.g. v198, latest, main)"
required: false
default: "v198"
default: "main"

permissions:
contents: read
Expand Down Expand Up @@ -50,9 +53,8 @@ jobs:
if: ${{ !env.ACT }}
uses: docker/setup-buildx-action@v3

# Disabled until ready to publish
- name: Log in to GitHub Container Registry
if: false
if: ${{ !env.ACT }}
uses: docker/login-action@v3
with:
registry: ghcr.io
Expand All @@ -61,10 +63,10 @@ jobs:

- name: Build unified Docker image
env:
LLAMA_REF: ${{ inputs.llama_cpp_ref }}
WHISPER_REF: ${{ inputs.whisper_ref }}
SD_REF: ${{ inputs.sd_ref }}
LS_VERSION: ${{ inputs.llama_swap_version }}
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
SD_REF: ${{ inputs.sd_ref || 'master' }}
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified
# When running under act, use the local builder that has warm ccache.
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
Expand All @@ -74,7 +76,9 @@ jobs:
chmod +x docker/unified/build-image.sh
docker/unified/build-image.sh

# Disabled until ready to publish
- name: Push to GitHub Container Registry
if: false
run: docker push ghcr.io/mostlygeek/llama-swap:unified
if: ${{ !env.ACT }}
run: |
docker push ghcr.io/mostlygeek/llama-swap:unified
docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
21 changes: 9 additions & 12 deletions docker/unified/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/

RUN pip3 install --no-cache-dir numpy sentencepiece

# Create llama-swap user and config directory
RUN useradd --system --no-create-home --shell /sbin/nologin llama-swap && \
mkdir -p /etc/llama-swap/config && \
chown -R llama-swap:llama-swap /etc/llama-swap

WORKDIR /app

# Copy whisper.cpp binaries and libraries
Expand All @@ -97,17 +102,7 @@ COPY --from=llama-swap-download /install/llama-swap-version /tmp/

RUN ldconfig

# Convenience symlinks
RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \
ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \
ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion

# Validate all binaries exist
RUN set -e && \
for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \
test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \
done && \
echo "All binaries validated successfully"
COPY config.example.yaml /etc/llama-swap/config/config.yaml

# Version tracking
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
Expand All @@ -118,4 +113,6 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt

WORKDIR /models
CMD ["bash"]
USER llama-swap
ENTRYPOINT ["llama-swap"]
CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
29 changes: 23 additions & 6 deletions docker/unified/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}"
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"

# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
# Requires only: git, network access to the remote.
Expand Down Expand Up @@ -131,9 +132,18 @@ else
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
fi

# Resolve llama-swap version
LS_VER="${LS_VERSION:-latest}"
echo "llama-swap: ${LS_VER}"
# Resolve llama-swap ref
if [[ -n "${LS_VERSION:-}" ]]; then
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
else
LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
if [[ -z "${LS_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for llama-swap" >&2
exit 1
fi
echo "llama-swap: latest HEAD: ${LS_HASH}"
fi

echo ""
echo "=========================================="
Expand All @@ -147,14 +157,21 @@ BUILD_ARGS=(
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
--build-arg "LS_VERSION=${LS_VER}"
--build-arg "LS_VERSION=${LS_HASH}"
-t "${DOCKER_IMAGE_TAG}"
-f "${SCRIPT_DIR}/Dockerfile"
)

if [[ "$NO_CACHE" == true ]]; then
BUILD_ARGS+=(--no-cache)
echo "Note: Building without cache"
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache"
BUILD_ARGS+=(
--cache-from "type=registry,ref=${CACHE_REF}"
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
)
echo "Note: Using registry cache (${CACHE_REF})"
fi

DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
Expand All @@ -167,7 +184,7 @@ echo ""

MISSING_BINARIES=()
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
MISSING_BINARIES+=("${binary}")
fi
done
Expand Down Expand Up @@ -196,7 +213,7 @@ echo "Built with:"
echo " llama.cpp: ${LLAMA_HASH}"
echo " whisper.cpp: ${WHISPER_HASH}"
echo " stable-diffusion.cpp: ${SD_HASH}"
echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
echo ""
echo "Run with:"
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
33 changes: 33 additions & 0 deletions docker/unified/config.example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# placeholder example configuration
healthCheckTimeout: 300
logRequests: true

models:
"llama":
cmd: >
llama-server
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
--port ${PORT}

"whisper":
checkEndpoint: /v1/audio/transcriptions/
cmd: >
whisper-server
--port ${PORT}
--m /models/whisper.bin
--flash-attn
--request-path /v1/audio/transcriptions --inference-path ""

"image":
checkEndpoint: /
cmd: |
/app/sd-server
--listen-port 9999
--diffusion-fa
--diffusion-model /models/z_image_turbo-Q8_0.gguf
--vae /models/ae.safetensors
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
--offload-to-cpu
--cfg-scale 1.0
--height 512 --width 512
--steps 8
19 changes: 17 additions & 2 deletions docker/unified/install-llama-swap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,27 @@
set -e

VERSION="${1:-latest}"
# Strip leading 'v' prefix so both "198" and "v198" work
VERSION="${VERSION#v}"
REPO="mostlygeek/llama-swap"

mkdir -p /install/bin

# If a full commit hash is given, find the release tag that points to it
if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
| grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
if [ -n "${TAG}" ]; then
echo "Resolved to tag: ${TAG}"
VERSION="${TAG#v}"
else
echo "No release tag found for commit ${VERSION:0:7}, using latest"
VERSION="latest"
fi
fi

# Strip leading 'v' prefix so both "198" and "v198" work
VERSION="${VERSION#v}"

# Resolve "latest" to actual version number
if [ "$VERSION" = "latest" ]; then
echo "=== Resolving latest llama-swap release ==="
Expand Down
Loading