Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions .github/workflows/unified-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ permissions:
jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
backend: [cuda, vulkan]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down Expand Up @@ -61,24 +65,25 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build unified Docker image
- name: Build unified Docker image (${{ matrix.backend }})
env:
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
SD_REF: ${{ inputs.sd_ref || 'master' }}
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
# When running under act, use the local builder that has warm ccache.
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
# created by setup-buildx-action above.
BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }}
run: |
chmod +x docker/unified/build-image.sh
docker/unified/build-image.sh
docker/unified/build-image.sh --${{ matrix.backend }}

- name: Push to GitHub Container Registry
if: ${{ !env.ACT }}
run: |
docker push ghcr.io/mostlygeek/llama-swap:unified
docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
DATE_TAG=$(date -u +%Y-%m-%d)
docker tag ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG}
docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG}
107 changes: 81 additions & 26 deletions docker/unified/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# Unified multi-stage Dockerfile for CUDA-accelerated AI inference tools
# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp, llama-swap
# Unified multi-stage Dockerfile for AI inference tools
# Supports CUDA and Vulkan backends via BACKEND build arg
#
# Usage:
# docker buildx build -t llama-swap:unified .
# docker buildx build --build-arg BACKEND=cuda -t llama-swap:unified-cuda .
# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:unified-vulkan .
#
# Each project has its own install script that handles cloning, building,
# and installing binaries. Build stages are independent for cache efficiency.

# Builder base: CUDA devel image with build tools
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base
ARG BACKEND=cuda

# ── Builder bases ──────────────────────────────────────────────────────

FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda

ENV DEBIAN_FRONTEND=noninteractive
ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
Expand All @@ -23,42 +27,72 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

WORKDIR /build

# Build whisper.cpp (fastest build, run first)
# ──

FROM ubuntu:22.04 AS builder-base-vulkan

ENV DEBIAN_FRONTEND=noninteractive
ENV CCACHE_DIR=/ccache
ENV CCACHE_MAXSIZE=2G
ENV PATH="/usr/lib/ccache:${PATH}"

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git python3 python3-pip libssl-dev \
curl ca-certificates ccache make wget software-properties-common \
&& rm -rf /var/lib/apt/lists/*

# Install LunarG Vulkan SDK (Ubuntu 22.04 repo headers are too old)
RUN wget -qO /etc/apt/trusted.gpg.d/lunarg.asc https://packages.lunarg.com/lunarg-signing-key-pub.asc \
&& echo "deb https://packages.lunarg.com/vulkan jammy main" > /etc/apt/sources.list.d/lunarg-vulkan.list \
&& apt-get update && apt-get install -y --no-install-recommends vulkan-sdk \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /build

# ── Select builder base by BACKEND ────────────────────────────────────

FROM builder-base-${BACKEND} AS builder-base

# ── Build whisper.cpp (fastest build, run first) ──────────────────────

FROM builder-base AS whisper-build
ARG BACKEND=cuda
ARG WHISPER_COMMIT_HASH=master
COPY install-whisper.sh /build/
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
--mount=type=cache,id=whisper-cuda,target=/src/whisper.cpp/build \
bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}"
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \
BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}"

# ── Build stable-diffusion.cpp ────────────────────────────────────────

# Build stable-diffusion.cpp
FROM builder-base AS sd-build
ARG BACKEND=cuda
ARG SD_COMMIT_HASH=master
COPY install-sd.sh /build/
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
--mount=type=cache,id=sd-cuda,target=/src/stable-diffusion.cpp/build \
bash /build/install-sd.sh "${SD_COMMIT_HASH}"
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \
BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}"

# ── Build llama.cpp (slowest build, run last) ─────────────────────────

# Build llama.cpp (slowest build, run last)
FROM builder-base AS llama-build
ARG BACKEND=cuda
ARG LLAMA_COMMIT_HASH=master
COPY install-llama.sh /build/
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
--mount=type=cache,id=llama-cuda,target=/src/llama.cpp/build \
bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"

# ── Download llama-swap release binary ────────────────────────────────

# Download llama-swap release binary
FROM builder-base AS llama-swap-download
ARG LS_VERSION=latest
COPY install-llama-swap.sh /build/
RUN bash /build/install-llama-swap.sh "${LS_VERSION}"

# Runtime image (no build tooling)
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime
# ── Runtime bases ─────────────────────────────────────────────────────

ARG LLAMA_COMMIT_HASH=unknown
ARG WHISPER_COMMIT_HASH=unknown
ARG SD_COMMIT_HASH=unknown
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda

ENV DEBIAN_FRONTEND=noninteractive
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
Expand All @@ -69,8 +103,29 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*

# CUDA stub drivers for container compatibility
COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1

# ──

FROM ubuntu:22.04 AS runtime-vulkan

ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/usr/local/bin:${PATH}"

RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libvulkan1 mesa-vulkan-drivers \
python3 python3-pip curl ca-certificates git \
&& rm -rf /var/lib/apt/lists/*

# ── Select runtime base by BACKEND ────────────────────────────────────

FROM runtime-${BACKEND} AS runtime

ARG BACKEND=cuda
ARG LLAMA_COMMIT_HASH=unknown
ARG WHISPER_COMMIT_HASH=unknown
ARG SD_COMMIT_HASH=unknown

RUN pip3 install --no-cache-dir numpy sentencepiece

Expand Down Expand Up @@ -109,7 +164,7 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
echo "backend: cuda" >> /versions.txt && \
echo "backend: ${BACKEND}" >> /versions.txt && \
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt

WORKDIR /models
Expand Down
63 changes: 46 additions & 17 deletions docker/unified/build-image.sh
Original file line number Diff line number Diff line change
@@ -1,32 +1,45 @@
#!/bin/bash
#
# Build script for unified CUDA container with version pinning
# Build script for unified container with version pinning
#
# Usage:
# ./build-image.sh # Build with auto-detected versions
# ./build-image.sh --no-cache # Build without cache
# LLAMA_REF=b1234 ./build-image.sh # Pin llama.cpp to a commit hash
# LLAMA_REF=v1.2.3 ./build-image.sh # Pin llama.cpp to a tag
# LLAMA_REF=my-branch ./build-image.sh # Pin llama.cpp to a branch
# WHISPER_REF=v1.0.0 ./build-image.sh # Pin whisper.cpp to a tag
# SD_REF=master ./build-image.sh # Pin stable-diffusion.cpp to a branch
# LS_VERSION=170 ./build-image.sh # Override llama-swap version
# ./build-image.sh --cuda # Build CUDA image
# ./build-image.sh --vulkan # Build Vulkan image
# ./build-image.sh --cuda --no-cache # Build without cache
# LLAMA_REF=b1234 ./build-image.sh --vulkan # Pin llama.cpp to a commit hash
# LLAMA_REF=v1.2.3 ./build-image.sh --cuda # Pin llama.cpp to a tag
# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag
# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch
# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version
#

set -euo pipefail

BACKEND=""
NO_CACHE=false

for arg in "$@"; do
case $arg in
--cuda)
BACKEND="cuda"
;;
--vulkan)
BACKEND="vulkan"
;;
--no-cache)
NO_CACHE=true
;;
--help|-h)
echo "Usage: ./build-image.sh [--no-cache]"
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
echo ""
echo "Options:"
echo " --cuda Build CUDA image (NVIDIA GPUs)"
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
echo " --no-cache Force rebuild without using Docker cache"
echo " --help, -h Show this help message"
echo ""
echo "Environment variables:"
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified)"
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)"
echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch"
echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch"
echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch"
Expand All @@ -36,7 +49,14 @@ for arg in "$@"; do
esac
done

DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}"
if [[ -z "$BACKEND" ]]; then
echo "Error: No backend specified. Please use --cuda or --vulkan."
echo ""
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
exit 1
fi

DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}}"

# Git repository URLs
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
Expand Down Expand Up @@ -89,7 +109,7 @@ get_latest_hash() {
}

echo "=========================================="
echo "llama-swap Unified CUDA Build"
echo "llama-swap Unified Build (${BACKEND})"
echo "=========================================="
echo ""

Expand Down Expand Up @@ -154,6 +174,7 @@ echo ""
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

BUILD_ARGS=(
--build-arg "BACKEND=${BACKEND}"
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
Expand All @@ -166,7 +187,7 @@ if [[ "$NO_CACHE" == true ]]; then
BUILD_ARGS+=(--no-cache)
echo "Note: Building without cache"
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache"
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache"
BUILD_ARGS+=(
--cache-from "type=registry,ref=${CACHE_REF}"
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
Expand Down Expand Up @@ -196,7 +217,7 @@ if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
done
echo ""
echo "Try running with --no-cache flag:"
echo " ./build-image.sh --no-cache"
echo " ./build-image.sh --${BACKEND} --no-cache"
exit 1
fi

Expand All @@ -215,5 +236,13 @@ echo " whisper.cpp: ${WHISPER_HASH}"
echo " stable-diffusion.cpp: ${SD_HASH}"
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
echo ""
echo "Run with:"
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
if [[ "$BACKEND" == "vulkan" ]]; then
echo "Run with:"
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
echo ""
echo "Note: For AMD GPUs, you may also need:"
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
else
echo "Run with:"
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
fi
31 changes: 21 additions & 10 deletions docker/unified/install-llama.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/bin/bash
# Install llama.cpp - clone, build with CUDA, and install binaries
# Usage: ./install-llama.sh <commit_hash>
# Install llama.cpp - clone, build, and install binaries
# Usage: BACKEND=cuda|vulkan ./install-llama.sh <commit_hash>
set -e

COMMIT_HASH="${1:-master}"
BACKEND="${BACKEND:-cuda}"

mkdir -p /install/bin /install/lib

Expand All @@ -18,26 +19,36 @@ fi
git fetch --depth=1 origin "${COMMIT_HASH}"
git checkout FETCH_HEAD

# CUDA cmake flags + llama-specific flags
# Common cmake flags
CMAKE_FLAGS=(
-DGGML_NATIVE=OFF
-DCMAKE_BUILD_TYPE=Release
-DGGML_CUDA=ON
-DGGML_VULKAN=OFF
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DLLAMA_BUILD_TESTS=OFF
)

if [ "$BACKEND" = "cuda" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=ON
-DGGML_VULKAN=OFF
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
)
elif [ "$BACKEND" = "vulkan" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=OFF
-DGGML_VULKAN=ON
)
fi

TARGETS=(llama-cli llama-server)

rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true

echo "=== Building llama.cpp for CUDA ==="
echo "=== Building llama.cpp for ${BACKEND} ==="
cmake -B build "${CMAKE_FLAGS[@]}"
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"

Expand Down
Loading
Loading