diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml new file mode 100644 index 00000000..c75afa5c --- /dev/null +++ b/.github/workflows/unified-docker.yml @@ -0,0 +1,80 @@ +name: Build Unified Docker Image + +on: + workflow_dispatch: + inputs: + llama_cpp_ref: + description: "llama.cpp commit hash, tag, or branch" + required: false + default: "b8468" + whisper_ref: + description: "whisper.cpp commit hash, tag, or branch" + required: false + default: "v1.8.4" + sd_ref: + description: "stable-diffusion.cpp commit hash, tag, or branch" + required: false + default: "545fac4" + llama_swap_version: + description: "llama-swap version (e.g. v198, latest)" + required: false + default: "v198" + +permissions: + contents: read + packages: write + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Free up disk space + run: | + echo "Before cleanup:" + df -h + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker system prune -af + echo "After cleanup:" + df -h + + # On GitHub Actions runners, create a fresh builder. + # When running locally under act, skip this and reuse the existing + # llama-swap-builder (which has ccache warm) to avoid exhausting disk. + - name: Set up Docker Buildx + if: ${{ !env.ACT }} + uses: docker/setup-buildx-action@v3 + + # Disabled until ready to publish + - name: Log in to GitHub Container Registry + if: false + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build unified Docker image + env: + LLAMA_REF: ${{ inputs.llama_cpp_ref }} + WHISPER_REF: ${{ inputs.whisper_ref }} + SD_REF: ${{ inputs.sd_ref }} + LS_VERSION: ${{ inputs.llama_swap_version }} + DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified + # When running under act, use the local builder that has warm ccache. + # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder + # created by setup-buildx-action above. + BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }} + run: | + chmod +x docker/unified/build-image.sh + docker/unified/build-image.sh + + # Disabled until ready to publish + - name: Push to GitHub Container Registry + if: false + run: docker push ghcr.io/mostlygeek/llama-swap:unified diff --git a/Makefile b/Makefile index f7d18586..f0fb12d2 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ mac: ui linux: ui @echo "Building Linux binary..." GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64 - GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 +#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 # Build Windows binary windows: ui diff --git a/docker/build-image.sh b/docker/build-image.sh new file mode 100755 index 00000000..7695b604 --- /dev/null +++ b/docker/build-image.sh @@ -0,0 +1,305 @@ +#!/bin/bash +# +# Build script for llama-swap-docker with commit hash pinning +# +# Usage: +# ./build-image.sh --cuda # Build CUDA image +# ./build-image.sh --vulkan # Build Vulkan image +# ./build-image.sh --cuda --no-cache # Build CUDA image without cache +# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit +# LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan # Override llama.cpp release tag (vulkan uses prebuilt binaries) +# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit +# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit +# +# Features: +# - Auto-detects latest commit hashes from git repos +# - Builds llama-swap from local source code +# - Allows environment variable overrides for reproducible builds +# - Cache-friendly: changing commit hash busts cache appropriately +# - Supports both CUDA and Vulkan backends (requires explicit flag) +# + +set -euo pipefail + +# Parse command line arguments +BACKEND="" +NO_CACHE=false + +if [[ $# -eq 0 ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + echo "" + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + exit 1 +fi + +for arg in "$@"; do + case $arg in + --cuda) + BACKEND="cuda" + ;; + --vulkan) + BACKEND="vulkan" + ;; + --no-cache) + NO_CACHE=true + ;; + --help|-h) + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + exit 0 + ;; + esac +done + +# Validate backend selection +if [[ -z "$BACKEND" ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + exit 1 +fi + +# Configuration +if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then + # User provided a custom tag, use it as-is + : +elif [[ "$BACKEND" == "vulkan" ]]; then + DOCKER_IMAGE_TAG="llama-swap:vulkan" +else + DOCKER_IMAGE_TAG="llama-swap:cuda" +fi +DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}" + +# Single unified Dockerfile, backend selected via build arg +DOCKERFILE="Dockerfile" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Building for: Vulkan (AMD GPUs and compatible hardware)" +else + echo "Building for: CUDA (NVIDIA GPUs)" +fi + +# Git repository URLs +LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" +WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" +SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" + +# Function to get the latest commit hash from a git repo's default branch +get_latest_commit() { + local repo_url="$1" + local branch="${2:-master}" + + # Try to get the latest commit hash for the specified branch + git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1 +} + +# Function to get the default branch name (master or main) +get_default_branch() { + local repo_url="$1" + + # Check for master first + if git ls-remote --heads "${repo_url}" master &>/dev/null; then + echo "master" + elif git ls-remote --heads "${repo_url}" main &>/dev/null; then + echo "main" + else + echo "master" # fallback + fi +} + +# Function to get the latest release tag from a GitHub repo +get_latest_release_tag() { + local owner_repo="$1" + curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \ + | grep '"tag_name"' | head -1 | cut -d'"' -f4 +} + +echo "==========================================" +echo "llama-swap-docker Build Script" +echo "==========================================" +echo "" + +# Determine commit hashes / release tags - use env vars or auto-detect +# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries). +# For cuda builds (or whisper on any backend), use git commit hashes. +if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then + LLAMA_HASH="${LLAMA_COMMIT_HASH}" + echo "llama.cpp: Using provided version: ${LLAMA_HASH}" +elif [[ "$BACKEND" == "vulkan" ]]; then + LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest release tag for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}" +else + LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}") + LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}" +fi + +if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then + WHISPER_HASH="${WHISPER_COMMIT_HASH}" + echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}" +else + WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}") + WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}") + if [[ -z "${WHISPER_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for whisper.cpp" >&2 + exit 1 + fi + echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}" +fi + +if [[ -n "${SD_COMMIT_HASH:-}" ]]; then + SD_HASH="${SD_COMMIT_HASH}" + echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}" +elif [[ "$BACKEND" == "vulkan" ]]; then + SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}" +else + SD_BRANCH=$(get_default_branch "${SD_REPO}") + SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}" +fi + +echo "" +echo "==========================================" +echo "Starting Docker build..." +echo "==========================================" +echo "" + +# Build the Docker image with commit hashes as build args +# Build context is the repository root (..) so the Dockerfile can access Go source +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +BUILD_ARGS=( + --build-arg "BACKEND=${BACKEND}" + --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" + --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" + --build-arg "SD_COMMIT_HASH=${SD_HASH}" + -t "${DOCKER_IMAGE_TAG}" + -f "${SCRIPT_DIR}/${DOCKERFILE}" +) + +if [[ "$NO_CACHE" == true ]]; then + BUILD_ARGS+=(--no-cache) + echo "Note: Building without cache" +fi + +# Use docker buildx with a custom builder for parallelism control +# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var +# We need to use a custom builder with a buildkitd.toml config file +BUILDER_NAME="llama-swap-builder" + +# Check if our custom builder exists with the right config, create/update if needed +if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then + echo "Creating custom buildx builder with max-parallelism=1..." + + # Create buildkitd.toml config file + cat > buildkitd.toml << 'BUILDKIT_EOF' +[worker.oci] + max-parallelism = 1 +BUILDKIT_EOF + + # Create the builder with the config + docker buildx create --name "$BUILDER_NAME" \ + --driver docker-container \ + --buildkitd-config buildkitd.toml \ + --use +else + # Switch to our builder + docker buildx use "$BUILDER_NAME" +fi + +echo "Building with sequential stages (one at a time), each using all CPU cores..." +echo "Using builder: $BUILDER_NAME" + +# Use docker buildx build with --load to load the image into Docker +# The --builder flag ensures we use our custom builder with max-parallelism=1 +# Build context is the repository root so we can access Go source files +docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}" + +echo "" +echo "==========================================" +echo "Verifying build artifacts..." +echo "==========================================" +echo "" + +# Verify all expected binaries exist in the image +MISSING_BINARIES=() + +for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do + if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then + MISSING_BINARIES+=("${binary}") + fi +done + +if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then + echo "ERROR: Build succeeded but the following binaries are missing from the image:" + for binary in "${MISSING_BINARIES[@]}"; do + echo " - ${binary}" + done + echo "" + echo "This usually indicates a build stage failure. Try running with --no-cache flag:" + echo " ./build-image.sh --vulkan --no-cache" + exit 1 +fi + +echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" + +echo "" +echo "==========================================" +echo "Build complete!" +echo "==========================================" +echo "" +echo "Image tag: ${DOCKER_IMAGE_TAG}" +echo "" +echo "Built with:" +echo " llama.cpp: ${LLAMA_HASH}" +echo " whisper.cpp: ${WHISPER_HASH}" +echo " stable-diffusion.cpp: ${SD_HASH}" +echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)" +echo "" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Run with:" + echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}" + echo "" + echo "Note: For AMD GPUs, you may also need to mount render devices:" + echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}" +else + echo "Run with:" + echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" +fi diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile new file mode 100644 index 00000000..8a865a87 --- /dev/null +++ b/docker/unified/Dockerfile @@ -0,0 +1,121 @@ +# Unified multi-stage Dockerfile for CUDA-accelerated AI inference tools +# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp, llama-swap +# +# Usage: +# docker buildx build -t llama-swap:unified . +# +# Each project has its own install script that handles cloning, building, +# and installing binaries. Build stages are independent for cache efficiency. + +# Builder base: CUDA devel image with build tools +FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base + +ENV DEBIAN_FRONTEND=noninteractive +ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" +ENV CCACHE_DIR=/ccache +ENV CCACHE_MAXSIZE=2G +ENV PATH="/usr/lib/ccache:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential cmake git python3 python3-pip libssl-dev \ + curl ca-certificates ccache make wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +# Build whisper.cpp (fastest build, run first) +FROM builder-base AS whisper-build +ARG WHISPER_COMMIT_HASH=master +COPY install-whisper.sh /build/ +RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ + --mount=type=cache,id=whisper-cuda,target=/src/whisper.cpp/build \ + bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}" + +# Build stable-diffusion.cpp +FROM builder-base AS sd-build +ARG SD_COMMIT_HASH=master +COPY install-sd.sh /build/ +RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ + --mount=type=cache,id=sd-cuda,target=/src/stable-diffusion.cpp/build \ + bash /build/install-sd.sh "${SD_COMMIT_HASH}" + +# Build llama.cpp (slowest build, run last) +FROM builder-base AS llama-build +ARG LLAMA_COMMIT_HASH=master +COPY install-llama.sh /build/ +RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ + --mount=type=cache,id=llama-cuda,target=/src/llama.cpp/build \ + bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}" + +# Download llama-swap release binary +FROM builder-base AS llama-swap-download +ARG LS_VERSION=latest +COPY install-llama-swap.sh /build/ +RUN bash /build/install-llama-swap.sh "${LS_VERSION}" + +# Runtime image (no build tooling) +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime + +ARG LLAMA_COMMIT_HASH=unknown +ARG WHISPER_COMMIT_HASH=unknown +ARG SD_COMMIT_HASH=unknown + +ENV DEBIAN_FRONTEND=noninteractive +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +ENV PATH="/usr/local/bin:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 python3 python3-pip curl ca-certificates git \ + && rm -rf /var/lib/apt/lists/* + +# CUDA stub drivers for container compatibility +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + +RUN pip3 install --no-cache-dir numpy sentencepiece + +WORKDIR /app + +# Copy whisper.cpp binaries and libraries +COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/ +COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/ +COPY --from=whisper-build /install/lib/ /usr/local/lib/ + +# Copy stable-diffusion.cpp binaries and libraries +COPY --from=sd-build /install/bin/sd-server /usr/local/bin/ +COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/ +COPY --from=sd-build /install/lib/ /usr/local/lib/ + +# Copy llama.cpp binaries and libraries +COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ +COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ +COPY --from=llama-build /install/lib/ /usr/local/lib/ + +# Copy llama-swap binary +COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/ +COPY --from=llama-swap-download /install/llama-swap-version /tmp/ + +RUN ldconfig + +# Convenience symlinks +RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ + ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ + ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion + +# Validate all binaries exist +RUN set -e && \ + for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \ + test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \ + done && \ + echo "All binaries validated successfully" + +# Version tracking +RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ + echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ + echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ + echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ + echo "backend: cuda" >> /versions.txt && \ + echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt + +WORKDIR /models +CMD ["bash"] diff --git a/docker/unified/README.md b/docker/unified/README.md new file mode 100644 index 00000000..01b6a860 --- /dev/null +++ b/docker/unified/README.md @@ -0,0 +1,8 @@ +# Unified Docker Container + +These scripts create a custom llama-swap container that contains: + +- llama-server for LLMs, rerank and embedding model support +- sd-server (stable-diffusion.cpp) for image generation +- whisper.cpp for ASR + diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh new file mode 100755 index 00000000..8ac42842 --- /dev/null +++ b/docker/unified/build-image.sh @@ -0,0 +1,202 @@ +#!/bin/bash +# +# Build script for unified CUDA container with version pinning +# +# Usage: +# ./build-image.sh # Build with auto-detected versions +# ./build-image.sh --no-cache # Build without cache +# LLAMA_REF=b1234 ./build-image.sh # Pin llama.cpp to a commit hash +# LLAMA_REF=v1.2.3 ./build-image.sh # Pin llama.cpp to a tag +# LLAMA_REF=my-branch ./build-image.sh # Pin llama.cpp to a branch +# WHISPER_REF=v1.0.0 ./build-image.sh # Pin whisper.cpp to a tag +# SD_REF=master ./build-image.sh # Pin stable-diffusion.cpp to a branch +# LS_VERSION=170 ./build-image.sh # Override llama-swap version +# + +set -euo pipefail + +NO_CACHE=false + +for arg in "$@"; do + case $arg in + --no-cache) + NO_CACHE=true + ;; + --help|-h) + echo "Usage: ./build-image.sh [--no-cache]" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified)" + echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch" + echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch" + echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch" + echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')" + exit 0 + ;; + esac +done + +DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}" + +# Git repository URLs +LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" +WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" +SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" + +# Resolve a git ref (commit hash, tag, or branch) to a full commit hash. +# Requires only: git, network access to the remote. +resolve_ref() { + local repo_url="$1" + local ref="$2" + + # Full 40-char SHA — use as-is + if [[ "${ref}" =~ ^[0-9a-f]{40}$ ]]; then + echo "${ref}" + return + fi + + # Try tag then branch (exact match) + local hash + hash=$(git ls-remote "${repo_url}" "refs/tags/${ref}" "refs/heads/${ref}" 2>/dev/null | head -1 | cut -f1) + if [[ -n "${hash}" ]]; then + echo "${hash}" + return + fi + + # Short hash (7+ chars): scan all refs for a SHA with this prefix + if [[ "${ref}" =~ ^[0-9a-f]{7,}$ ]]; then + hash=$(git ls-remote "${repo_url}" 2>/dev/null | grep "^${ref}" | head -1 | cut -f1) + if [[ -n "${hash}" ]]; then + echo "${hash}" + return + fi + fi + + echo "ERROR: Could not resolve ref '${ref}' for ${repo_url}" >&2 + if [[ "${ref}" =~ ^[0-9a-f]+$ && ${#ref} -lt 7 ]]; then + echo " Short hashes must be at least 7 characters (got ${#ref})." >&2 + else + echo " Tried: tag, branch, git ls-remote prefix match" >&2 + fi + echo " Use a full 40-char SHA, a tag name, a branch name, or a 7+ char short hash." >&2 + return 1 +} + +# Resolve HEAD of a repo without needing to know the default branch name. +get_latest_hash() { + git ls-remote "${1}" HEAD 2>/dev/null | head -1 | cut -f1 +} + +echo "==========================================" +echo "llama-swap Unified CUDA Build" +echo "==========================================" +echo "" + +# Resolve llama.cpp ref +if [[ -n "${LLAMA_REF:-}" ]]; then + LLAMA_HASH=$(resolve_ref "${LLAMA_REPO}" "${LLAMA_REF}") || exit 1 + echo "llama.cpp: ${LLAMA_REF} -> ${LLAMA_HASH}" +else + LLAMA_HASH=$(get_latest_hash "${LLAMA_REPO}") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: latest HEAD: ${LLAMA_HASH}" +fi + +# Resolve whisper.cpp ref +if [[ -n "${WHISPER_REF:-}" ]]; then + WHISPER_HASH=$(resolve_ref "${WHISPER_REPO}" "${WHISPER_REF}") || exit 1 + echo "whisper.cpp: ${WHISPER_REF} -> ${WHISPER_HASH}" +else + WHISPER_HASH=$(get_latest_hash "${WHISPER_REPO}") + if [[ -z "${WHISPER_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for whisper.cpp" >&2 + exit 1 + fi + echo "whisper.cpp: latest HEAD: ${WHISPER_HASH}" +fi + +# Resolve stable-diffusion.cpp ref +if [[ -n "${SD_REF:-}" ]]; then + SD_HASH=$(resolve_ref "${SD_REPO}" "${SD_REF}") || exit 1 + echo "stable-diffusion.cpp: ${SD_REF} -> ${SD_HASH}" +else + SD_HASH=$(get_latest_hash "${SD_REPO}") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}" +fi + +# Resolve llama-swap version +LS_VER="${LS_VERSION:-latest}" +echo "llama-swap: ${LS_VER}" + +echo "" +echo "==========================================" +echo "Starting Docker build..." +echo "==========================================" +echo "" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +BUILD_ARGS=( + --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" + --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" + --build-arg "SD_COMMIT_HASH=${SD_HASH}" + --build-arg "LS_VERSION=${LS_VER}" + -t "${DOCKER_IMAGE_TAG}" + -f "${SCRIPT_DIR}/Dockerfile" +) + +if [[ "$NO_CACHE" == true ]]; then + BUILD_ARGS+=(--no-cache) + echo "Note: Building without cache" +fi + +DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}" + +echo "" +echo "==========================================" +echo "Verifying build artifacts..." +echo "==========================================" +echo "" + +MISSING_BINARIES=() +for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do + if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then + MISSING_BINARIES+=("${binary}") + fi +done + +if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then + echo "ERROR: Build succeeded but the following binaries are missing:" + for binary in "${MISSING_BINARIES[@]}"; do + echo " - ${binary}" + done + echo "" + echo "Try running with --no-cache flag:" + echo " ./build-image.sh --no-cache" + exit 1 +fi + +echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" + +echo "" +echo "==========================================" +echo "Build complete!" +echo "==========================================" +echo "" +echo "Image tag: ${DOCKER_IMAGE_TAG}" +echo "" +echo "Built with:" +echo " llama.cpp: ${LLAMA_HASH}" +echo " whisper.cpp: ${WHISPER_HASH}" +echo " stable-diffusion.cpp: ${SD_HASH}" +echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)" +echo "" +echo "Run with:" +echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" diff --git a/docker/unified/install-llama-swap.sh b/docker/unified/install-llama-swap.sh new file mode 100755 index 00000000..f98aaac8 --- /dev/null +++ b/docker/unified/install-llama-swap.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Install llama-swap - download latest release binary from GitHub +# Usage: ./install-llama-swap.sh [version] +# version: release version number (e.g., "170") or "latest" (default) +set -e + +VERSION="${1:-latest}" +# Strip leading 'v' prefix so both "198" and "v198" work +VERSION="${VERSION#v}" +REPO="mostlygeek/llama-swap" + +mkdir -p /install/bin + +# Resolve "latest" to actual version number +if [ "$VERSION" = "latest" ]; then + echo "=== Resolving latest llama-swap release ===" + VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" \ + | grep '"tag_name"' | head -1 | cut -d'"' -f4 | sed 's/^v//') + if [ -z "$VERSION" ]; then + echo "FATAL: Could not determine latest release version" >&2 + exit 1 + fi + echo "Latest version: ${VERSION}" +fi + +# Download and extract +URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_amd64.tar.gz" +echo "=== Downloading llama-swap v${VERSION} ===" +echo "URL: $URL" +curl -fSL -o /tmp/llama-swap.tar.gz "$URL" +tar -xzf /tmp/llama-swap.tar.gz -C /install/bin/ +rm /tmp/llama-swap.tar.gz + +# Validate +if [ ! -x "/install/bin/llama-swap" ]; then + echo "FATAL: llama-swap binary not found or not executable" >&2 + ls -la /install/bin/ >&2 + exit 1 +fi + +echo "$VERSION" > /install/llama-swap-version + +echo "=== llama-swap v${VERSION} installed ===" +ls -la /install/bin/llama-swap diff --git a/docker/unified/install-llama.sh b/docker/unified/install-llama.sh new file mode 100755 index 00000000..c5c0847f --- /dev/null +++ b/docker/unified/install-llama.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Install llama.cpp - clone, build with CUDA, and install binaries +# Usage: ./install-llama.sh +set -e + +COMMIT_HASH="${1:-master}" + +mkdir -p /install/bin /install/lib + +# Clone and checkout (init-based so cache-mounted /src/llama.cpp/build dir doesn't break clone) +echo "=== Cloning llama.cpp at ${COMMIT_HASH} ===" +mkdir -p /src/llama.cpp +cd /src/llama.cpp +if [ ! -d .git ]; then + git init + git remote add origin https://github.com/ggml-org/llama.cpp.git +fi +git fetch --depth=1 origin "${COMMIT_HASH}" +git checkout FETCH_HEAD + +# CUDA cmake flags + llama-specific flags +CMAKE_FLAGS=( + -DGGML_NATIVE=OFF + -DCMAKE_BUILD_TYPE=Release + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DLLAMA_BUILD_TESTS=OFF +) + +TARGETS=(llama-cli llama-server) + +rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true + +echo "=== Building llama.cpp for CUDA ===" +cmake -B build "${CMAKE_FLAGS[@]}" +cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}" + +for bin in "${TARGETS[@]}"; do + if [ ! -f "build/bin/$bin" ]; then + echo "FATAL: $bin not found in build/bin/" >&2 + exit 1 + fi + cp "build/bin/$bin" "/install/bin/" +done +find build -name "*.so*" -type f -exec cp {} /install/lib/ \; + +echo "=== llama.cpp build complete ===" +ls -la /install/bin/ diff --git a/docker/unified/install-sd.sh b/docker/unified/install-sd.sh new file mode 100755 index 00000000..48b36c68 --- /dev/null +++ b/docker/unified/install-sd.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Install stable-diffusion.cpp - clone and build with CUDA, install binaries and library +# Usage: ./install-sd.sh +set -e + +COMMIT_HASH="${1:-master}" + +mkdir -p /install/bin /install/lib + +# Clone and checkout (init-based so cache-mounted /src/stable-diffusion.cpp/build dir doesn't break clone) +echo "=== Cloning stable-diffusion.cpp at ${COMMIT_HASH} ===" +mkdir -p /src/stable-diffusion.cpp +cd /src/stable-diffusion.cpp +if [ ! -d .git ]; then + git init + git remote add origin https://github.com/leejet/stable-diffusion.cpp.git +fi +git fetch --depth=1 origin "${COMMIT_HASH}" +git checkout FETCH_HEAD +git submodule update --init --recursive --depth=1 + +# CUDA cmake flags + sd-specific flags +CMAKE_FLAGS=( + -DGGML_NATIVE=OFF + -DCMAKE_BUILD_TYPE=Release + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DSD_BUILD_EXAMPLES=ON + -DSD_CUDA=ON +) + +TARGETS=(stable-diffusion sd-cli sd-server) + +rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true + +echo "=== Building stable-diffusion.cpp for CUDA ===" +cmake -B build "${CMAKE_FLAGS[@]}" +cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}" + +for bin in sd-cli sd-server; do + if [ ! -f "build/bin/$bin" ]; then + echo "FATAL: $bin not found in build/bin/" >&2 + exit 1 + fi + cp "build/bin/$bin" "/install/bin/" +done +find build -name "*.so*" -type f -exec cp {} /install/lib/ \; + +echo "=== stable-diffusion.cpp build complete ===" +ls -la /install/bin/ /install/lib/ diff --git a/docker/unified/install-whisper.sh b/docker/unified/install-whisper.sh new file mode 100755 index 00000000..da18b096 --- /dev/null +++ b/docker/unified/install-whisper.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Install whisper.cpp - clone, build with CUDA, and install binaries +# Usage: ./install-whisper.sh +set -e + +COMMIT_HASH="${1:-master}" + +mkdir -p /install/bin /install/lib + +# Clone and checkout (init-based so cache-mounted /src/whisper.cpp/build dir doesn't break clone) +echo "=== Cloning whisper.cpp at ${COMMIT_HASH} ===" +mkdir -p /src/whisper.cpp +cd /src/whisper.cpp +if [ ! -d .git ]; then + git init + git remote add origin https://github.com/ggml-org/whisper.cpp.git +fi +git fetch --depth=1 origin "${COMMIT_HASH}" +git checkout FETCH_HEAD + +# CUDA cmake flags +CMAKE_FLAGS=( + -DGGML_NATIVE=OFF + -DCMAKE_BUILD_TYPE=Release + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache +) + +TARGETS=(whisper-cli whisper-server) + +rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true + +echo "=== Building whisper.cpp for CUDA ===" +cmake -B build "${CMAKE_FLAGS[@]}" +cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}" + +for bin in "${TARGETS[@]}"; do + if [ ! -f "build/bin/$bin" ]; then + echo "FATAL: $bin not found in build/bin/" >&2 + exit 1 + fi + cp "build/bin/$bin" "/install/bin/" +done +find build -name "*.so*" -type f -exec cp {} /install/lib/ \; + +echo "=== whisper.cpp build complete ===" +ls -la /install/bin/