From 6969c5276329dd350d7e4f1fdd46ccd1ed5f4107 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 16 Mar 2026 23:54:32 +0000 Subject: [PATCH 1/4] checkpoint --- docker/.gitignore | 2 + docker/AGENTS.md | 17 +++ docker/Dockerfile.cuda | 235 ++++++++++++++++++++++++++++++ docker/Dockerfile.vulkan | 241 +++++++++++++++++++++++++++++++ docker/build-image.sh | 300 +++++++++++++++++++++++++++++++++++++++ docker/test-binaries.sh | 120 ++++++++++++++++ 6 files changed, 915 insertions(+) create mode 100644 docker/.gitignore create mode 100644 docker/AGENTS.md create mode 100644 docker/Dockerfile.cuda create mode 100644 docker/Dockerfile.vulkan create mode 100755 docker/build-image.sh create mode 100755 docker/test-binaries.sh diff --git a/docker/.gitignore b/docker/.gitignore new file mode 100644 index 00000000..b7697f06 --- /dev/null +++ b/docker/.gitignore @@ -0,0 +1,2 @@ +# BuildKit configuration file (generated by build-image.sh) +buildkitd.toml diff --git a/docker/AGENTS.md b/docker/AGENTS.md new file mode 100644 index 00000000..8c82679e --- /dev/null +++ b/docker/AGENTS.md @@ -0,0 +1,17 @@ +- you are working in my VM sandbox. It is safe to use sudo. +- use or install whatever tools you need to complete your goal +- use DOCKER_BUILDKIT=1 docker build -t llama-swap:optimized + - DOCKER_BUILDKIT=1 is important to use the caching +- ALWAYS send notifications to get the user's attention +- when running `./build-image.sh`, use a 2-hour (7200000ms) timeout minimum as CUDA builds take 60-120+ minutes to compile for multiple architectures + +# Notifications + +ALWAYS send notifications to keep the user informed: + +- When starting or finishing a job +- For progress updates on long-running tasks (especially Docker builds) +- For todo list progress updates (when items start/complete) +- When you need feedback or to elicit information from the user +- use pushover.sh , example: `pushover.sh "notification to send"` + diff --git a/docker/Dockerfile.cuda b/docker/Dockerfile.cuda new file mode 100644 index 00000000..bebcb771 --- /dev/null +++ b/docker/Dockerfile.cuda @@ -0,0 +1,235 @@ +# Multi-stage Dockerfile for CUDA-optimized AI inference tools +# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp +# +# CUDA architectures targeting consumer/prosumer GPUs from Pascal through Blackwell: +# sm_60 - Pascal (HPC): Tesla P100 (GP100) - full FP16 2x throughput, HBM2 +# sm_61 - Pascal (consumer): Tesla P40, GTX 1080/1070/1060, Titan Xp (GP102/104/106) +# sm_75 - Turing: RTX 2080/2070/2060, Quadro RTX 6000, Tesla T4 - 2nd gen Tensor Cores, INT8/INT4 +# sm_86 - Ampere (consumer): RTX 3090/3080/3070/3060, RTX A6000 - 3rd gen Tensor Cores, BF16/TF32, 128 FP32 cores/SM +# sm_89 - Ada Lovelace: RTX 4090/4080/4070/4060, RTX 6000 Ada - FP8 support, 4th gen Tensor Cores +# sm_120 - Blackwell (consumer): RTX 5090/5080/5070 - requires CUDA 12.8+ (not included, base image is 12.4) +# +# Build with BuildKit for cache mounts: +# DOCKER_BUILDKIT=1 docker build -t llama-swap:latest . +# +# Features: +# - Separate git clone stages for independent caching +# - ccache support for C++ compilation caching +# - BuildKit cache mounts for cmake incremental builds +# - Independent project rebuilds + +# ============================================================================ +# Stage 1: Base Builder with Common Dependencies +# ============================================================================ +FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base + +ENV DEBIAN_FRONTEND=noninteractive +ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" +ENV CCACHE_DIR=/ccache +ENV CCACHE_MAXSIZE=2G +ENV PATH="/usr/lib/ccache:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git \ + python3 \ + python3-pip \ + libssl-dev \ + curl \ + ca-certificates \ + ccache \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /src + +# ============================================================================ +# Stage 2: Source Cloning (Cached Independently) +# ============================================================================ + +FROM builder-base AS llama-source +ARG LLAMA_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/llama.cpp.git /src/llama.cpp && \ + cd /src/llama.cpp && \ + git fetch --depth=1 origin ${LLAMA_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS whisper-source +ARG WHISPER_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/whisper.cpp.git /src/whisper.cpp && \ + cd /src/whisper.cpp && \ + git fetch --depth=1 origin ${WHISPER_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS sd-source +ARG SD_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/leejet/stable-diffusion.cpp.git /src/stable-diffusion.cpp && \ + cd /src/stable-diffusion.cpp && \ + git fetch --depth=1 origin ${SD_COMMIT_HASH} && \ + git checkout FETCH_HEAD && \ + git submodule update --init --recursive --depth=1 + +# ============================================================================ +# Stage 3: Individual Project Builds with Cache Mounts +# ============================================================================ + +# Build whisper.cpp (first - quick to build) +FROM builder-base AS whisper-build +COPY --from=whisper-source /src/whisper.cpp /build/whisper.cpp +WORKDIR /build/whisper.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/whisper.cpp/build \ + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON \ + -DGGML_VULKAN=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \ + cmake --build build --config Release -j$(nproc) --target whisper-cli whisper-server && \ + mkdir -p /install/bin /install/lib && \ + cp -v build/bin/whisper-cli build/bin/whisper-server /install/bin/ 2>/dev/null || true && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; + +# Build stable-diffusion.cpp (second - medium build time) +FROM builder-base AS sd-build +COPY --from=sd-source /src/stable-diffusion.cpp /build/stable-diffusion.cpp +WORKDIR /build/stable-diffusion.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/stable-diffusion.cpp/build \ + set -e && \ + rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true && \ + cmake -B build \ + -DSD_CUDA=ON \ + -DGGML_CUDA=ON \ + -DGGML_NATIVE=OFF \ + -DGGML_VULKAN=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \ + cmake --build build --config Release -j$(nproc) --target sd-cli sd-server && \ + mkdir -p /install/bin /install/lib && \ + cp -v build/bin/sd-cli build/bin/sd-server /install/bin/ && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; + +# Build llama.cpp (last - longest build time) +FROM builder-base AS llama-build +COPY --from=llama-source /src/llama.cpp /build/llama.cpp +WORKDIR /build/llama.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/llama.cpp/build \ + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON \ + -DGGML_VULKAN=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \ + cmake --build build --config Release -j$(nproc) --target llama-cli llama-server && \ + # Copy outputs to install directory for easy extraction + mkdir -p /install/bin /install/lib /install/examples && \ + cp -v build/bin/llama-cli build/bin/llama-server /install/bin/ 2>/dev/null || true && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; && \ + cp -vr examples/* /install/examples/ 2>/dev/null || true + +# ============================================================================ +# Stage 4: Runtime Stage +# ============================================================================ +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime + +ARG LLAMA_SWAP_VERSION=v198 +ARG LLAMA_COMMIT_HASH=unknown +ARG WHISPER_COMMIT_HASH=unknown +ARG SD_COMMIT_HASH=unknown +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/usr/local/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 \ + python3 \ + python3-pip \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Download and install llama-swap binary +RUN LLAMA_SWAP_VER_NUM=$(echo "${LLAMA_SWAP_VERSION}" | sed 's/^v//') && \ + curl -L -o /tmp/llama-swap.tar.gz \ + "https://github.com/mostlygeek/llama-swap/releases/download/${LLAMA_SWAP_VERSION}/llama-swap_${LLAMA_SWAP_VER_NUM}_linux_amd64.tar.gz" && \ + tar -xzf /tmp/llama-swap.tar.gz -C /usr/local/bin/ && \ + rm /tmp/llama-swap.tar.gz && \ + chmod +x /usr/local/bin/llama-swap + +# Copy libcuda stubs for CPU-only testing (not in ldconfig path to avoid conflicts with real drivers) +# Real NVIDIA drivers will be used when available; stubs are fallback for testing on CPU-only hosts +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + +# Copy test script for verifying binaries work correctly +COPY test-binaries.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/test-binaries.sh + +RUN pip3 install --no-cache-dir numpy sentencepiece + +WORKDIR /app + +# Copy specific binaries from each build stage +# Copy only specific binaries from each build stage +COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ +COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ +COPY --from=llama-build /install/lib/ /usr/local/lib/ +COPY --from=llama-build /install/examples/ /opt/llama.cpp/examples/ + +COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/ +COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/ +COPY --from=whisper-build /install/lib/ /usr/local/lib/ + +COPY --from=sd-build /install/bin/sd-server /usr/local/bin/ +COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/ +COPY --from=sd-build /install/lib/ /usr/local/lib/ + +# Update library cache +RUN ldconfig + +# Create symlinks for common command names +RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ + ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ + ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion + +# Verify installation +RUN echo "=== Installed Binaries ===" && \ + ls -la /usr/local/bin/llama* /usr/local/bin/whisper* /usr/local/bin/sd* /usr/local/bin/llama-swap 2>/dev/null || true && \ + echo "" && \ + echo "=== Library Check ===" && \ + ldconfig -p | grep -E "(ggml|llama|whisper|sd)" || true && \ + echo "" && \ + echo "=== llama-swap Version ===" && \ + llama-swap --version 2>/dev/null || true && \ + echo "" && \ + echo "=== llama-cli Version ===" && \ + llama-cli --version 2>/dev/null || true + +# Write version information to /versions.txt +RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ + echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ + echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ + echo "llama-swap: ${LLAMA_SWAP_VERSION}" >> /versions.txt && \ + echo "backend: CUDA" >> /versions.txt && \ + echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt + +WORKDIR /models +CMD ["bash"] diff --git a/docker/Dockerfile.vulkan b/docker/Dockerfile.vulkan new file mode 100644 index 00000000..1df1de5c --- /dev/null +++ b/docker/Dockerfile.vulkan @@ -0,0 +1,241 @@ +# Multi-stage Dockerfile for Vulkan GPU-accelerated AI inference tools +# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp +# Supports: AMD GPUs and other Vulkan-compatible hardware +# +# Build with BuildKit for cache mounts: +# DOCKER_BUILDKIT=1 docker build -t llama-swap:vulkan . +# +# Features: +# - Separate git clone stages for independent caching +# - ccache support for C++ compilation caching +# - BuildKit cache mounts for cmake incremental builds +# - Independent project rebuilds +# - No architecture targeting needed (Vulkan is portable) + +# ============================================================================ +# Stage 1: Base Builder with Common Dependencies +# ============================================================================ +FROM ubuntu:22.04 AS builder-base + +ENV DEBIAN_FRONTEND=noninteractive +ENV CCACHE_DIR=/ccache +ENV CCACHE_MAXSIZE=2G +ENV VULKAN_SDK=/opt/vulkan-sdk + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + git \ + python3 \ + python3-pip \ + libssl-dev \ + curl \ + ca-certificates \ + ccache \ + wget \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +# Install newer Vulkan SDK (1.3.275.0) - needed for whisper.cpp +RUN wget -q --show-progress \ + https://sdk.lunarg.com/sdk/download/1.3.275.0/linux/vulkansdk-linux-x86_64-1.3.275.0.tar.xz \ + -O /tmp/vulkan-sdk.tar.xz && \ + mkdir -p /opt && \ + tar -xf /tmp/vulkan-sdk.tar.xz -C /opt && \ + rm /tmp/vulkan-sdk.tar.xz + +# Set up Vulkan environment (SDK extracts to /opt/1.3.275.0/x86_64) +ENV VULKAN_SDK=/opt/1.3.275.0/x86_64 +ENV PATH="${VULKAN_SDK}/bin:${PATH}" +ENV CMAKE_PREFIX_PATH="${VULKAN_SDK}" +ENV VULKAN_INCLUDE_DIRS="${VULKAN_SDK}/include" + +# Create ccache symlinks for compiler caching +RUN mkdir -p /usr/lib/ccache && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/c++ +ENV PATH="/usr/lib/ccache:${PATH}" + +WORKDIR /src + +# ============================================================================ +# Stage 2: Source Cloning (Cached Independently) +# ============================================================================ + +FROM builder-base AS llama-source +ARG LLAMA_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/llama.cpp.git /src/llama.cpp && \ + cd /src/llama.cpp && \ + git fetch --depth=1 origin ${LLAMA_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS whisper-source +ARG WHISPER_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/whisper.cpp.git /src/whisper.cpp && \ + cd /src/whisper.cpp && \ + git fetch --depth=1 origin ${WHISPER_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS sd-source +ARG SD_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/leejet/stable-diffusion.cpp.git /src/stable-diffusion.cpp && \ + cd /src/stable-diffusion.cpp && \ + git fetch --depth=1 origin ${SD_COMMIT_HASH} && \ + git checkout FETCH_HEAD && \ + git submodule update --init --recursive --depth=1 + +# ============================================================================ +# Stage 3: Individual Project Builds with Cache Mounts +# ============================================================================ + +# Build whisper.cpp (first - quick to build) +FROM builder-base AS whisper-build +COPY --from=whisper-source /src/whisper.cpp /build/whisper.cpp +WORKDIR /build/whisper.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/whisper.cpp/build \ + rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true && \ + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DGGML_VULKAN=ON \ + -DVulkan_INCLUDE_DIR="${VULKAN_SDK}/include" \ + -DVulkan_LIBRARY="${VULKAN_SDK}/lib/libvulkan.so" \ + -DCMAKE_BUILD_TYPE=Release && \ + cmake --build build --config Release -j$(nproc) --target whisper-cli whisper-server && \ + mkdir -p /install/bin /install/lib && \ + cp -v build/bin/whisper-cli build/bin/whisper-server /install/bin/ && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; + +# Build stable-diffusion.cpp (second - medium build time) +FROM builder-base AS sd-build +COPY --from=sd-source /src/stable-diffusion.cpp /build/stable-diffusion.cpp +WORKDIR /build/stable-diffusion.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/stable-diffusion.cpp/build \ + set -e && \ + rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true && \ + cmake -B build \ + -DSD_VULKAN=ON \ + -DGGML_VULKAN=ON \ + -DSD_BUILD_EXAMPLES=ON \ + -DGGML_NATIVE=OFF \ + -DVulkan_INCLUDE_DIR="${VULKAN_SDK}/include" \ + -DVulkan_LIBRARY="${VULKAN_SDK}/lib/libvulkan.so" \ + -DCMAKE_BUILD_TYPE=Release && \ + cmake --build build --config Release -j$(nproc) --target sd-cli sd-server && \ + mkdir -p /install/bin /install/lib && \ + cp -v build/bin/sd-cli build/bin/sd-server /install/bin/ && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; + +# Build llama.cpp (last - longest build time) +FROM builder-base AS llama-build +COPY --from=llama-source /src/llama.cpp /build/llama.cpp +WORKDIR /build/llama.cpp +RUN --mount=type=cache,target=/ccache \ + --mount=type=cache,target=/build/llama.cpp/build \ + rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true && \ + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DGGML_VULKAN=ON \ + -DGGML_BACKEND_DL=ON \ + -DVulkan_INCLUDE_DIR="${VULKAN_SDK}/include" \ + -DVulkan_LIBRARY="${VULKAN_SDK}/lib/libvulkan.so" \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_BUILD_TYPE=Release && \ + cmake --build build --config Release -j$(nproc) --target llama-cli llama-server && \ + # Copy outputs to install directory for easy extraction + mkdir -p /install/bin /install/lib /install/examples && \ + cp -v build/bin/llama-cli build/bin/llama-server /install/bin/ 2>/dev/null || true && \ + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; && \ + cp -vr examples/* /install/examples/ 2>/dev/null || true + +# ============================================================================ +# Stage 4: Runtime Stage +# ============================================================================ +FROM ubuntu:22.04 AS runtime + +ARG LLAMA_SWAP_VERSION=v198 +ARG LLAMA_COMMIT_HASH=unknown +ARG WHISPER_COMMIT_HASH=unknown +ARG SD_COMMIT_HASH=unknown +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/usr/local/bin:${PATH}" +# Allow the Vulkan loader to find the Mesa ICDs +ENV VK_DRIVER_FILES=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json:/usr/share/vulkan/icd.d/radeon_icd.x86_64.json:/usr/share/vulkan/icd.d/intel_icd.x86_64.json + +# Install runtime dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 \ + libvulkan1 \ + mesa-vulkan-drivers \ + vulkan-tools \ + python3 \ + python3-pip \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Download and install llama-swap binary +RUN LLAMA_SWAP_VER_NUM=$(echo "${LLAMA_SWAP_VERSION}" | sed 's/^v//') && \ + curl -L -o /tmp/llama-swap.tar.gz \ + "https://github.com/mostlygeek/llama-swap/releases/download/${LLAMA_SWAP_VERSION}/llama-swap_${LLAMA_SWAP_VER_NUM}_linux_amd64.tar.gz" && \ + tar -xzf /tmp/llama-swap.tar.gz -C /usr/local/bin/ && \ + rm /tmp/llama-swap.tar.gz && \ + chmod +x /usr/local/bin/llama-swap + +RUN pip3 install --no-cache-dir numpy sentencepiece + +WORKDIR /app + +# Copy specific binaries from each build stage +# Copy only specific binaries from each build stage +COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ +COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ +COPY --from=llama-build /install/lib/ /usr/local/lib/ +COPY --from=llama-build /install/examples/ /opt/llama.cpp/examples/ + +COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/ +COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/ +COPY --from=whisper-build /install/lib/ /usr/local/lib/ + +COPY --from=sd-build /install/bin/sd-server /usr/local/bin/ +COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/ +COPY --from=sd-build /install/lib/ /usr/local/lib/ + +# Update library cache +RUN ldconfig + +# Create symlinks for common command names +RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ + ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ + ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion + +# Verify installation +RUN echo "=== Installed Binaries ===" && \ + ls -la /usr/local/bin/llama* /usr/local/bin/whisper* /usr/local/bin/sd* /usr/local/bin/llama-swap 2>/dev/null || true && \ + echo "" && \ + echo "=== Library Check ===" && \ + ldconfig -p | grep -E "(ggml|llama|whisper|sd)" || true && \ + echo "" && \ + echo "=== Vulkan Support Check ===" && \ + ls -la /usr/share/vulkan/icd.d/ 2>/dev/null || true && \ + echo "" && \ + echo "=== llama-swap Version ===" && \ + llama-swap --version 2>/dev/null || true && \ + echo "" && \ + echo "=== llama-cli Version ===" && \ + llama-cli --version 2>/dev/null || true + +# Write version information to /versions.txt +RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ + echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ + echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ + echo "llama-swap: ${LLAMA_SWAP_VERSION}" >> /versions.txt && \ + echo "backend: Vulkan" >> /versions.txt && \ + echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt + +WORKDIR /models +CMD ["bash"] diff --git a/docker/build-image.sh b/docker/build-image.sh new file mode 100755 index 00000000..d1f420fe --- /dev/null +++ b/docker/build-image.sh @@ -0,0 +1,300 @@ +#!/bin/bash +# +# Build script for llama-swap-docker with commit hash pinning +# +# Usage: +# ./build-image.sh --cuda # Build CUDA image +# ./build-image.sh --vulkan # Build Vulkan image +# ./build-image.sh --cuda --no-cache # Build CUDA image without cache +# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit +# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit +# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit +# LLAMA_SWAP_VERSION=v198 ./build-image.sh --cuda # Override llama-swap version +# +# Features: +# - Auto-detects latest commit hashes from git repos +# - Auto-detects latest llama-swap release +# - Allows environment variable overrides for reproducible builds +# - Cache-friendly: changing commit hash busts cache appropriately +# - Supports both CUDA and Vulkan backends (requires explicit flag) +# + +set -euo pipefail + +# Parse command line arguments +BACKEND="" +NO_CACHE=false + +if [[ $# -eq 0 ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + echo "" + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:latest or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + echo " LLAMA_SWAP_VERSION Override llama-swap version (default: latest, e.g., v198)" + exit 1 +fi + +for arg in "$@"; do + case $arg in + --cuda) + BACKEND="cuda" + ;; + --vulkan) + BACKEND="vulkan" + ;; + --no-cache) + NO_CACHE=true + ;; + --help|-h) + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:latest or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + echo " LLAMA_SWAP_VERSION Override llama-swap version (default: latest, e.g., v198)" + exit 0 + ;; + esac +done + +# Validate backend selection +if [[ -z "$BACKEND" ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + exit 1 +fi + +# Configuration +if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then + # User provided a custom tag, use it as-is + : +elif [[ "$BACKEND" == "vulkan" ]]; then + DOCKER_IMAGE_TAG="llama-swap:vulkan" +else + DOCKER_IMAGE_TAG="llama-swap:cuda" +fi +DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}" + +# Set Dockerfile based on backend +if [[ "$BACKEND" == "vulkan" ]]; then + DOCKERFILE="Dockerfile.vulkan" + echo "Building for: Vulkan (AMD GPUs and compatible hardware)" +else + DOCKERFILE="Dockerfile.cuda" + echo "Building for: CUDA (NVIDIA GPUs)" +fi + +# Git repository URLs +LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" +WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" +SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" +LLAMA_SWAP_REPO="mostlygeek/llama-swap" + +# Function to get the latest llama-swap release version +get_latest_llama_swap_version() { + curl -s "https://api.github.com/repos/${LLAMA_SWAP_REPO}/releases/latest" | grep -oP '"tag_name": "\K[^"]+' || echo "" +} + +# Function to get the latest commit hash from a git repo's default branch +get_latest_commit() { + local repo_url="$1" + local branch="${2:-master}" + + # Try to get the latest commit hash for the specified branch + git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1 +} + +# Function to get the default branch name (master or main) +get_default_branch() { + local repo_url="$1" + + # Check for master first + if git ls-remote --heads "${repo_url}" master &>/dev/null; then + echo "master" + elif git ls-remote --heads "${repo_url}" main &>/dev/null; then + echo "main" + else + echo "master" # fallback + fi +} + +echo "==========================================" +echo "llama-swap-docker Build Script" +echo "==========================================" +echo "" + +# Determine commit hashes - use env vars or auto-detect +if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then + LLAMA_HASH="${LLAMA_COMMIT_HASH}" + echo "llama.cpp: Using provided commit hash: ${LLAMA_HASH}" +else + LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}") + LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}" +fi + +if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then + WHISPER_HASH="${WHISPER_COMMIT_HASH}" + echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}" +else + WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}") + WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}") + if [[ -z "${WHISPER_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for whisper.cpp" >&2 + exit 1 + fi + echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}" +fi + +if [[ -n "${SD_COMMIT_HASH:-}" ]]; then + SD_HASH="${SD_COMMIT_HASH}" + echo "stable-diffusion.cpp: Using provided commit hash: ${SD_HASH}" +else + SD_BRANCH=$(get_default_branch "${SD_REPO}") + SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}" +fi + +# Determine llama-swap version - use env var or auto-detect +if [[ -n "${LLAMA_SWAP_VERSION:-}" ]]; then + LLAMA_SWAP_VER="${LLAMA_SWAP_VERSION}" + echo "llama-swap: Using provided version: ${LLAMA_SWAP_VER}" +else + LLAMA_SWAP_VER=$(get_latest_llama_swap_version) + if [[ -z "${LLAMA_SWAP_VER}" ]]; then + echo "ERROR: Could not determine latest llama-swap version" >&2 + exit 1 + fi + echo "llama-swap: Auto-detected latest version: ${LLAMA_SWAP_VER}" +fi + +echo "" +echo "==========================================" +echo "Starting Docker build..." +echo "==========================================" +echo "" + +# Build the Docker image with commit hashes and llama-swap version as build args +BUILD_ARGS=( + --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" + --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" + --build-arg "SD_COMMIT_HASH=${SD_HASH}" + --build-arg "LLAMA_SWAP_VERSION=${LLAMA_SWAP_VER}" + -t "${DOCKER_IMAGE_TAG}" + -f "${DOCKERFILE}" +) + +if [[ "$NO_CACHE" == true ]]; then + BUILD_ARGS+=(--no-cache) + echo "Note: Building without cache" +fi + +# Use docker buildx with a custom builder for parallelism control +# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var +# We need to use a custom builder with a buildkitd.toml config file +BUILDER_NAME="llama-swap-builder" + +# Check if our custom builder exists with the right config, create/update if needed +if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then + echo "Creating custom buildx builder with max-parallelism=1..." + + # Create buildkitd.toml config file + cat > buildkitd.toml << 'BUILDKIT_EOF' +[worker.oci] + max-parallelism = 1 +BUILDKIT_EOF + + # Create the builder with the config + docker buildx create --name "$BUILDER_NAME" \ + --driver docker-container \ + --buildkitd-config buildkitd.toml \ + --use +else + # Switch to our builder + docker buildx use "$BUILDER_NAME" +fi + +echo "Building with sequential stages (one at a time), each using all CPU cores..." +echo "Using builder: $BUILDER_NAME" + +# Use docker buildx build with --load to load the image into Docker +# The --builder flag ensures we use our custom builder with max-parallelism=1 +docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" . + +echo "" +echo "==========================================" +echo "Verifying build artifacts..." +echo "==========================================" +echo "" + +# Verify all expected binaries exist in the image +MISSING_BINARIES=() + +for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do + if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then + MISSING_BINARIES+=("${binary}") + fi +done + +if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then + echo "ERROR: Build succeeded but the following binaries are missing from the image:" + for binary in "${MISSING_BINARIES[@]}"; do + echo " - ${binary}" + done + echo "" + echo "This usually indicates a build stage failure. Try running with --no-cache flag:" + echo " ./build-image.sh --vulkan --no-cache" + exit 1 +fi + +echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" + +echo "" +echo "==========================================" +echo "Build complete!" +echo "==========================================" +echo "" +echo "Image tag: ${DOCKER_IMAGE_TAG}" +echo "" +echo "Built with:" +echo " llama.cpp: ${LLAMA_HASH}" +echo " whisper.cpp: ${WHISPER_HASH}" +echo " stable-diffusion.cpp: ${SD_HASH}" +echo " llama-swap: ${LLAMA_SWAP_VER}" +echo "" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Run with:" + echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}" + echo "" + echo "Note: For AMD GPUs, you may also need to mount render devices:" + echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}" +else + echo "Run with:" + echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" +fi diff --git a/docker/test-binaries.sh b/docker/test-binaries.sh new file mode 100755 index 00000000..8fe05d2e --- /dev/null +++ b/docker/test-binaries.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Test script for verifying CUDA-enabled binaries work +# Automatically detects real NVIDIA drivers vs stub drivers + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored output +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Detect if real NVIDIA drivers are available +detect_cuda_drivers() { + local real_driver_paths=( + "/lib/x86_64-linux-gnu/libcuda.so.1" + "/usr/lib/x86_64-linux-gnu/libcuda.so.1" + "/usr/local/cuda/lib64/libcuda.so.1" + ) + + for path in "${real_driver_paths[@]}"; do + if [ -f "$path" ]; then + print_info "Real NVIDIA drivers found at: $path" + return 0 + fi + done + + return 1 +} + +# Main execution +print_info "Starting binary tests..." + +# Check for real drivers +if detect_cuda_drivers; then + print_info "Using real NVIDIA drivers" + # Unset any stub-related LD_LIBRARY_PATH to avoid conflicts + export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" +else + print_warn "No real NVIDIA drivers detected" + print_warn "Falling back to stub drivers for testing" + print_warn "GPU functionality will NOT be available" + + # Add stubs to LD_LIBRARY_PATH for testing + export LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + print_info "LD_LIBRARY_PATH set to: $LD_LIBRARY_PATH" +fi + +# Test llama-server +print_info "Testing llama-server..." +if command -v llama-server &> /dev/null; then + if llama-server --help > /dev/null 2>&1 || llama-server -h > /dev/null 2>&1; then + print_info "✓ llama-server: OK" + else + print_error "✗ llama-server: Failed to run" + exit 1 + fi +else + print_error "✗ llama-server: Not found in PATH" + exit 1 +fi + +# Test whisper-server +print_info "Testing whisper-server..." +if command -v whisper-server &> /dev/null; then + if whisper-server --help > /dev/null 2>&1 || whisper-server -h > /dev/null 2>&1; then + print_info "✓ whisper-server: OK" + else + print_error "✗ whisper-server: Failed to run" + exit 1 + fi +else + print_error "✗ whisper-server: Not found in PATH" + exit 1 +fi + +# Test sd-server (stable-diffusion) +print_info "Testing sd-server..." +if command -v sd-server &> /dev/null; then + if sd-server --help > /dev/null 2>&1 || sd-server -h > /dev/null 2>&1; then + print_info "✓ sd-server: OK" + else + print_error "✗ sd-server: Failed to run" + exit 1 + fi +else + print_error "✗ sd-server: Not found in PATH" + exit 1 +fi + +print_info "All binary tests passed!" + +# Additional info about environment +print_info "Environment information:" +echo " LD_LIBRARY_PATH: $LD_LIBRARY_PATH" +echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}" + +# Check if nvidia-smi is available +if command -v nvidia-smi &> /dev/null; then + print_info "nvidia-smi output:" + nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader 2>/dev/null || \ + print_warn "nvidia-smi found but could not query GPU information" +else + print_warn "nvidia-smi not available (expected on CPU-only hosts)" +fi + +exit 0 From c5eb79888256c11721fa282ae604240c841d79c5 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 17 Mar 2026 21:13:26 +0000 Subject: [PATCH 2/4] docker: build llama-swap from source in containers Build llama-swap binary from local source code instead of downloading from GitHub releases. This ensures the container uses the exact code in the repository. - Add golang:1.25-alpine builder stage to compile llama-swap - Generate version from git hash with +dirty suffix for unstaged changes - Update build-image.sh to use repository root as build context - Remove LLAMA_SWAP_VERSION environment variable and related code - Add test-binaries.sh to Dockerfile.vulkan for consistency Both CUDA and Vulkan Dockerfiles now build llama-swap from source. --- docker/Dockerfile.cuda | 42 +++++++++++++++++++++++++++++--------- docker/Dockerfile.vulkan | 44 ++++++++++++++++++++++++++++++++-------- docker/build-image.sh | 41 ++++++++++--------------------------- 3 files changed, 78 insertions(+), 49 deletions(-) diff --git a/docker/Dockerfile.cuda b/docker/Dockerfile.cuda index bebcb771..9198af65 100644 --- a/docker/Dockerfile.cuda +++ b/docker/Dockerfile.cuda @@ -18,6 +18,32 @@ # - BuildKit cache mounts for cmake incremental builds # - Independent project rebuilds +# ============================================================================ +# Stage 0: Build llama-swap from local source +# ============================================================================ +FROM golang:1.25-alpine AS llama-swap-builder +WORKDIR /app + +# Copy Go module files first for layer caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code and build +COPY . . +ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64 +RUN go build -o llama-swap . + +# Get version info: git hash + dirty flag, or 'local-build' fallback +RUN if git rev-parse --git-dir > /dev/null 2>&1; then \ + VERSION=$(git rev-parse --short HEAD) && \ + if [ -n "$(git status --porcelain)" ]; then \ + VERSION="${VERSION}+dirty"; \ + fi && \ + echo "$VERSION" > /app/llama-swap-version; \ + else \ + echo "local-build" > /app/llama-swap-version; \ + fi + # ============================================================================ # Stage 1: Base Builder with Common Dependencies # ============================================================================ @@ -150,7 +176,6 @@ RUN --mount=type=cache,target=/ccache \ # ============================================================================ FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime -ARG LLAMA_SWAP_VERSION=v198 ARG LLAMA_COMMIT_HASH=unknown ARG WHISPER_COMMIT_HASH=unknown ARG SD_COMMIT_HASH=unknown @@ -164,15 +189,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3-pip \ curl \ ca-certificates \ + git \ && rm -rf /var/lib/apt/lists/* -# Download and install llama-swap binary -RUN LLAMA_SWAP_VER_NUM=$(echo "${LLAMA_SWAP_VERSION}" | sed 's/^v//') && \ - curl -L -o /tmp/llama-swap.tar.gz \ - "https://github.com/mostlygeek/llama-swap/releases/download/${LLAMA_SWAP_VERSION}/llama-swap_${LLAMA_SWAP_VER_NUM}_linux_amd64.tar.gz" && \ - tar -xzf /tmp/llama-swap.tar.gz -C /usr/local/bin/ && \ - rm /tmp/llama-swap.tar.gz && \ - chmod +x /usr/local/bin/llama-swap +# Copy llama-swap binary from builder stage +COPY --from=llama-swap-builder /app/llama-swap /usr/local/bin/ +COPY --from=llama-swap-builder /app/llama-swap-version /tmp/ # Copy libcuda stubs for CPU-only testing (not in ldconfig path to avoid conflicts with real drivers) # Real NVIDIA drivers will be used when available; stubs are fallback for testing on CPU-only hosts @@ -180,7 +202,7 @@ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 # Copy test script for verifying binaries work correctly -COPY test-binaries.sh /usr/local/bin/ +COPY docker/test-binaries.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/test-binaries.sh RUN pip3 install --no-cache-dir numpy sentencepiece @@ -227,7 +249,7 @@ RUN echo "=== Installed Binaries ===" && \ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ - echo "llama-swap: ${LLAMA_SWAP_VERSION}" >> /versions.txt && \ + echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ echo "backend: CUDA" >> /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt diff --git a/docker/Dockerfile.vulkan b/docker/Dockerfile.vulkan index 1df1de5c..53b3d7dd 100644 --- a/docker/Dockerfile.vulkan +++ b/docker/Dockerfile.vulkan @@ -12,6 +12,32 @@ # - Independent project rebuilds # - No architecture targeting needed (Vulkan is portable) +# ============================================================================ +# Stage 0: Build llama-swap from local source +# ============================================================================ +FROM golang:1.25-alpine AS llama-swap-builder +WORKDIR /app + +# Copy Go module files first for layer caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code and build +COPY . . +ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64 +RUN go build -o llama-swap . + +# Get version info: git hash + dirty flag, or 'local-build' fallback +RUN if git rev-parse --git-dir > /dev/null 2>&1; then \ + VERSION=$(git rev-parse --short HEAD) && \ + if [ -n "$(git status --porcelain)" ]; then \ + VERSION="${VERSION}+dirty"; \ + fi && \ + echo "$VERSION" > /app/llama-swap-version; \ + else \ + echo "local-build" > /app/llama-swap-version; \ + fi + # ============================================================================ # Stage 1: Base Builder with Common Dependencies # ============================================================================ @@ -157,7 +183,6 @@ RUN --mount=type=cache,target=/ccache \ # ============================================================================ FROM ubuntu:22.04 AS runtime -ARG LLAMA_SWAP_VERSION=v198 ARG LLAMA_COMMIT_HASH=unknown ARG WHISPER_COMMIT_HASH=unknown ARG SD_COMMIT_HASH=unknown @@ -176,15 +201,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3-pip \ curl \ ca-certificates \ + git \ && rm -rf /var/lib/apt/lists/* -# Download and install llama-swap binary -RUN LLAMA_SWAP_VER_NUM=$(echo "${LLAMA_SWAP_VERSION}" | sed 's/^v//') && \ - curl -L -o /tmp/llama-swap.tar.gz \ - "https://github.com/mostlygeek/llama-swap/releases/download/${LLAMA_SWAP_VERSION}/llama-swap_${LLAMA_SWAP_VER_NUM}_linux_amd64.tar.gz" && \ - tar -xzf /tmp/llama-swap.tar.gz -C /usr/local/bin/ && \ - rm /tmp/llama-swap.tar.gz && \ - chmod +x /usr/local/bin/llama-swap +# Copy llama-swap binary from builder stage +COPY --from=llama-swap-builder /app/llama-swap /usr/local/bin/ +COPY --from=llama-swap-builder /app/llama-swap-version /tmp/ + +# Copy test script for verifying binaries work correctly +COPY docker/test-binaries.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/test-binaries.sh RUN pip3 install --no-cache-dir numpy sentencepiece @@ -233,7 +259,7 @@ RUN echo "=== Installed Binaries ===" && \ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ - echo "llama-swap: ${LLAMA_SWAP_VERSION}" >> /versions.txt && \ + echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ echo "backend: Vulkan" >> /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt diff --git a/docker/build-image.sh b/docker/build-image.sh index d1f420fe..fa4d50c1 100755 --- a/docker/build-image.sh +++ b/docker/build-image.sh @@ -9,11 +9,10 @@ # LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit # WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit # SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit -# LLAMA_SWAP_VERSION=v198 ./build-image.sh --cuda # Override llama-swap version # # Features: # - Auto-detects latest commit hashes from git repos -# - Auto-detects latest llama-swap release +# - Builds llama-swap from local source code # - Allows environment variable overrides for reproducible builds # - Cache-friendly: changing commit hash busts cache appropriately # - Supports both CUDA and Vulkan backends (requires explicit flag) @@ -37,11 +36,10 @@ if [[ $# -eq 0 ]]; then echo " --help, -h Show this help message" echo "" echo "Environment variables:" - echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:latest or llama-swap:vulkan)" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" - echo " LLAMA_SWAP_VERSION Override llama-swap version (default: latest, e.g., v198)" exit 1 fi @@ -66,11 +64,10 @@ for arg in "$@"; do echo " --help, -h Show this help message" echo "" echo "Environment variables:" - echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:latest or llama-swap:vulkan)" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" - echo " LLAMA_SWAP_VERSION Override llama-swap version (default: latest, e.g., v198)" exit 0 ;; esac @@ -106,12 +103,6 @@ fi LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" -LLAMA_SWAP_REPO="mostlygeek/llama-swap" - -# Function to get the latest llama-swap release version -get_latest_llama_swap_version() { - curl -s "https://api.github.com/repos/${LLAMA_SWAP_REPO}/releases/latest" | grep -oP '"tag_name": "\K[^"]+' || echo "" -} # Function to get the latest commit hash from a git repo's default branch get_latest_commit() { @@ -181,33 +172,22 @@ else echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}" fi -# Determine llama-swap version - use env var or auto-detect -if [[ -n "${LLAMA_SWAP_VERSION:-}" ]]; then - LLAMA_SWAP_VER="${LLAMA_SWAP_VERSION}" - echo "llama-swap: Using provided version: ${LLAMA_SWAP_VER}" -else - LLAMA_SWAP_VER=$(get_latest_llama_swap_version) - if [[ -z "${LLAMA_SWAP_VER}" ]]; then - echo "ERROR: Could not determine latest llama-swap version" >&2 - exit 1 - fi - echo "llama-swap: Auto-detected latest version: ${LLAMA_SWAP_VER}" -fi - echo "" echo "==========================================" echo "Starting Docker build..." echo "==========================================" echo "" -# Build the Docker image with commit hashes and llama-swap version as build args +# Build the Docker image with commit hashes as build args +# Build context is the repository root (..) so the Dockerfile can access Go source +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" BUILD_ARGS=( --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" --build-arg "SD_COMMIT_HASH=${SD_HASH}" - --build-arg "LLAMA_SWAP_VERSION=${LLAMA_SWAP_VER}" -t "${DOCKER_IMAGE_TAG}" - -f "${DOCKERFILE}" + -f "${SCRIPT_DIR}/${DOCKERFILE}" ) if [[ "$NO_CACHE" == true ]]; then @@ -245,7 +225,8 @@ echo "Using builder: $BUILDER_NAME" # Use docker buildx build with --load to load the image into Docker # The --builder flag ensures we use our custom builder with max-parallelism=1 -docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" . +# Build context is the repository root so we can access Go source files +docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}" echo "" echo "==========================================" @@ -286,7 +267,7 @@ echo "Built with:" echo " llama.cpp: ${LLAMA_HASH}" echo " whisper.cpp: ${WHISPER_HASH}" echo " stable-diffusion.cpp: ${SD_HASH}" -echo " llama-swap: ${LLAMA_SWAP_VER}" +echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)" echo "" if [[ "$BACKEND" == "vulkan" ]]; then echo "Run with:" From c6a93d8a12eb838d48312359d676334eab7f2b7f Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 17 Mar 2026 22:31:06 +0000 Subject: [PATCH 3/4] fix llama-swap build in Dockerfile.cuda --- docker/Dockerfile.cuda | 94 ++++++++++++++++------------------------ docker/Dockerfile.vulkan | 7 ++- 2 files changed, 41 insertions(+), 60 deletions(-) diff --git a/docker/Dockerfile.cuda b/docker/Dockerfile.cuda index 9198af65..69ebc3f4 100644 --- a/docker/Dockerfile.cuda +++ b/docker/Dockerfile.cuda @@ -19,36 +19,12 @@ # - Independent project rebuilds # ============================================================================ -# Stage 0: Build llama-swap from local source -# ============================================================================ -FROM golang:1.25-alpine AS llama-swap-builder -WORKDIR /app - -# Copy Go module files first for layer caching -COPY go.mod go.sum ./ -RUN go mod download - -# Copy source code and build -COPY . . -ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64 -RUN go build -o llama-swap . - -# Get version info: git hash + dirty flag, or 'local-build' fallback -RUN if git rev-parse --git-dir > /dev/null 2>&1; then \ - VERSION=$(git rev-parse --short HEAD) && \ - if [ -n "$(git status --porcelain)" ]; then \ - VERSION="${VERSION}+dirty"; \ - fi && \ - echo "$VERSION" > /app/llama-swap-version; \ - else \ - echo "local-build" > /app/llama-swap-version; \ - fi - -# ============================================================================ -# Stage 1: Base Builder with Common Dependencies +# Stage 0: Base Builder with Common Dependencies # ============================================================================ FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base +ARG NODE_MAJOR=24 + ENV DEBIAN_FRONTEND=noninteractive ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" ENV CCACHE_DIR=/ccache @@ -65,10 +41,40 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ ccache \ + make \ + gnupg \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ + | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_MAJOR}.x nodistro main" \ + > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update && apt-get install -y nodejs \ && rm -rf /var/lib/apt/lists/* WORKDIR /src +# ============================================================================ +# Stage 1: Build llama-swap from local source +# Uses builder-base for cache efficiency (shares base layers with other builders) +# ============================================================================ +FROM builder-base AS llama-swap-builder + +# Install Go 1.25 (ubuntu22.04 default is older) +ENV GOLANG_VERSION=1.25.0 +ENV GOPATH=/go +ENV PATH=/usr/local/go/bin:$GOPATH/bin:$PATH +RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz | tar -C /usr/local -xz + +WORKDIR /app + +# Copy Go module files first for layer caching +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code and build +COPY . . +ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64 +RUN mkdir /install && make linux && cp build/llama-swap-linux-amd64 /install/llama-swap + # ============================================================================ # Stage 2: Source Cloning (Cached Independently) # ============================================================================ @@ -166,19 +172,15 @@ RUN --mount=type=cache,target=/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \ cmake --build build --config Release -j$(nproc) --target llama-cli llama-server && \ # Copy outputs to install directory for easy extraction - mkdir -p /install/bin /install/lib /install/examples && \ + mkdir -p /install/bin /install/lib && \ cp -v build/bin/llama-cli build/bin/llama-server /install/bin/ 2>/dev/null || true && \ - find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; && \ - cp -vr examples/* /install/examples/ 2>/dev/null || true + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; # ============================================================================ # Stage 4: Runtime Stage # ============================================================================ FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime -ARG LLAMA_COMMIT_HASH=unknown -ARG WHISPER_COMMIT_HASH=unknown -ARG SD_COMMIT_HASH=unknown ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/local/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" @@ -214,7 +216,6 @@ WORKDIR /app COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ COPY --from=llama-build /install/lib/ /usr/local/lib/ -COPY --from=llama-build /install/examples/ /opt/llama.cpp/examples/ COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/ COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/ @@ -227,30 +228,11 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/ # Update library cache RUN ldconfig -# Create symlinks for common command names -RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ - ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ - ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion - -# Verify installation -RUN echo "=== Installed Binaries ===" && \ - ls -la /usr/local/bin/llama* /usr/local/bin/whisper* /usr/local/bin/sd* /usr/local/bin/llama-swap 2>/dev/null || true && \ - echo "" && \ - echo "=== Library Check ===" && \ - ldconfig -p | grep -E "(ggml|llama|whisper|sd)" || true && \ - echo "" && \ - echo "=== llama-swap Version ===" && \ - llama-swap --version 2>/dev/null || true && \ - echo "" && \ - echo "=== llama-cli Version ===" && \ - llama-cli --version 2>/dev/null || true - # Write version information to /versions.txt -RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ +RUN echo "llama.cpp: $(/usr/local/bin/llama-server --version 2>&1 | grep version) > /versions.txt && \ echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ - echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ - echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ - echo "backend: CUDA" >> /versions.txt && \ + echo "stable-diffusion.cpp: $(/usr/local/bin/sd-server --version)" >> /versions.txt && \ + echo "llama-swap: $(/usr/local/bin/llama-swap -version)" >> /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt WORKDIR /models diff --git a/docker/Dockerfile.vulkan b/docker/Dockerfile.vulkan index 53b3d7dd..6b48ae18 100644 --- a/docker/Dockerfile.vulkan +++ b/docker/Dockerfile.vulkan @@ -146,7 +146,7 @@ RUN --mount=type=cache,target=/ccache \ cmake -B build \ -DSD_VULKAN=ON \ -DGGML_VULKAN=ON \ - -DSD_BUILD_EXAMPLES=ON \ + -DSD_BUILD_EXAMPLES=OFF \ -DGGML_NATIVE=OFF \ -DVulkan_INCLUDE_DIR="${VULKAN_SDK}/include" \ -DVulkan_LIBRARY="${VULKAN_SDK}/lib/libvulkan.so" \ @@ -173,10 +173,9 @@ RUN --mount=type=cache,target=/ccache \ -DCMAKE_BUILD_TYPE=Release && \ cmake --build build --config Release -j$(nproc) --target llama-cli llama-server && \ # Copy outputs to install directory for easy extraction - mkdir -p /install/bin /install/lib /install/examples && \ + mkdir -p /install/bin /install/lib && \ cp -v build/bin/llama-cli build/bin/llama-server /install/bin/ 2>/dev/null || true && \ - find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; && \ - cp -vr examples/* /install/examples/ 2>/dev/null || true + find build -name "*.so*" -type f -exec cp -v {} /install/lib/ \; # ============================================================================ # Stage 4: Runtime Stage From 74c625bbd5cc2ad59c830ceb03fdc73885a35239 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 17 Mar 2026 16:58:01 -0700 Subject: [PATCH 4/4] docker,Makefile: fix cuda build --- Makefile | 2 +- docker/Dockerfile.cuda | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index f7d18586..f0fb12d2 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ mac: ui linux: ui @echo "Building Linux binary..." GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64 - GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 +#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 # Build Windows binary windows: ui diff --git a/docker/Dockerfile.cuda b/docker/Dockerfile.cuda index 69ebc3f4..4cd85991 100644 --- a/docker/Dockerfile.cuda +++ b/docker/Dockerfile.cuda @@ -195,8 +195,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && rm -rf /var/lib/apt/lists/* # Copy llama-swap binary from builder stage -COPY --from=llama-swap-builder /app/llama-swap /usr/local/bin/ -COPY --from=llama-swap-builder /app/llama-swap-version /tmp/ +COPY --from=llama-swap-builder /install/llama-swap /usr/local/bin/ # Copy libcuda stubs for CPU-only testing (not in ldconfig path to avoid conflicts with real drivers) # Real NVIDIA drivers will be used when available; stubs are fallback for testing on CPU-only hosts @@ -229,11 +228,11 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/ RUN ldconfig # Write version information to /versions.txt -RUN echo "llama.cpp: $(/usr/local/bin/llama-server --version 2>&1 | grep version) > /versions.txt && \ - echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ - echo "stable-diffusion.cpp: $(/usr/local/bin/sd-server --version)" >> /versions.txt && \ - echo "llama-swap: $(/usr/local/bin/llama-swap -version)" >> /versions.txt && \ - echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt +#RUN echo "llama.cpp: $(/usr/local/bin/llama-server --version 2>&1 | grep version) > /versions.txt && \ +# echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ +# echo "stable-diffusion.cpp: $(/usr/local/bin/sd-server --version)" >> /versions.txt && \ +# echo "llama-swap: $(/usr/local/bin/llama-swap -version)" >> /versions.txt && \ +# echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt WORKDIR /models CMD ["bash"]