diff --git a/Makefile b/Makefile index f7d18586a..f0fb12d2b 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ mac: ui linux: ui @echo "Building Linux binary..." GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64 - GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 +#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64 # Build Windows binary windows: ui diff --git a/docker/.gitignore b/docker/.gitignore new file mode 100644 index 000000000..b7697f06b --- /dev/null +++ b/docker/.gitignore @@ -0,0 +1,2 @@ +# BuildKit configuration file (generated by build-image.sh) +buildkitd.toml diff --git a/docker/AGENTS.md b/docker/AGENTS.md new file mode 100644 index 000000000..b42c74abe --- /dev/null +++ b/docker/AGENTS.md @@ -0,0 +1,24 @@ +- you are working in my VM sandbox. It is safe to use sudo. +- use or install whatever tools you need to complete your goal +- use `docker buildx build --build-arg BACKEND=cuda` or `--build-arg BACKEND=vulkan` with the unified `docker/Dockerfile` +- DOCKER_BUILDKIT=1 is required for cache mounts and conditional FROM stages +- ALWAYS send notifications to get the user's attention +- when running `./build-image.sh`, use a 2-hour (7200000ms) timeout minimum as CUDA builds take 60-120+ minutes to compile for multiple architectures + +# Adding a new server project + +1. Add source clone stage in `docker/Dockerfile` (FROM builder-base AS -source) +2. Add build stage with CUDA/Vulkan conditional cmake flags (FROM builder-base AS -build) +3. Add COPY lines in the runtime stage for binaries and libraries +4. Add the binary name(s) to the validation RUN step in the runtime stage +5. Add the repo URL and commit hash to `docker/build-image.sh` + +# Notifications + +ALWAYS send notifications to keep the user informed: + +- When starting or finishing a job +- For progress updates on long-running tasks (especially Docker builds) +- For todo list progress updates (when items start/complete) +- When you need feedback or to elicit information from the user +- use pushover.sh , example: `pushover.sh "notification to send"` diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..9d6fe044c --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,194 @@ +# Unified multi-stage Dockerfile for GPU-accelerated AI inference tools +# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp, llama-swap +# +# Usage: +# docker buildx build --build-arg BACKEND=cuda -t llama-swap:cuda . +# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:vulkan . +# +# Adding a new server project: +# 1. Add a case to install.sh with project-specific cmake flags and targets +# 2. Add a source clone stage (FROM builder-base AS -source) +# 3. Add a build stage that runs: bash /build/install.sh "$BACKEND" +# 4. Add COPY lines in the runtime stage +# 5. Add the binary name(s) to the validation RUN step +# 6. Update build-image.sh with the new repo URL and commit hash + +ARG BACKEND=cuda + +# Builder base: CUDA devel image + Vulkan SDK (supports both backends) +FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base + +ENV DEBIAN_FRONTEND=noninteractive +ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" +ENV CCACHE_DIR=/ccache +ENV CCACHE_MAXSIZE=2G +ENV PATH="/usr/lib/ccache:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential cmake git python3 python3-pip libssl-dev \ + curl ca-certificates ccache make wget xz-utils unzip \ + && rm -rf /var/lib/apt/lists/* + +ARG VULKAN_SDK_VERSION=1.3.275.0 +RUN wget -q --show-progress \ + https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz \ + -O /tmp/vulkan-sdk.tar.xz && \ + mkdir -p /opt && \ + tar -xf /tmp/vulkan-sdk.tar.xz -C /opt && \ + rm /tmp/vulkan-sdk.tar.xz + +ENV VULKAN_SDK=/opt/${VULKAN_SDK_VERSION}/x86_64 +ENV PATH="${VULKAN_SDK}/bin:${PATH}" +ENV CMAKE_PREFIX_PATH="${VULKAN_SDK}" +ENV VULKAN_INCLUDE_DIRS="${VULKAN_SDK}/include" + +WORKDIR /src + +# Build llama-swap from local source +FROM golang:1.25-alpine AS llama-swap-builder +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +ENV CGO_ENABLED=0 GOOS=linux GOARCH=amd64 +RUN go build -o llama-swap . +RUN if git rev-parse --git-dir > /dev/null 2>&1; then \ + VERSION=$(git rev-parse --short HEAD) && \ + if [ -n "$(git status --porcelain)" ]; then \ + VERSION="${VERSION}+dirty"; \ + fi && \ + echo "$VERSION" > /app/llama-swap-version; \ + else \ + echo "local-build" > /app/llama-swap-version; \ + fi + +# Source cloning (cached independently per project) +FROM builder-base AS llama-source +ARG LLAMA_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/llama.cpp.git /src/llama.cpp && \ + cd /src/llama.cpp && \ + git fetch --depth=1 origin ${LLAMA_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS whisper-source +ARG WHISPER_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/ggml-org/whisper.cpp.git /src/whisper.cpp && \ + cd /src/whisper.cpp && \ + git fetch --depth=1 origin ${WHISPER_COMMIT_HASH} && \ + git checkout FETCH_HEAD + +FROM builder-base AS sd-source +ARG SD_COMMIT_HASH=master +RUN git clone --filter=blob:none --no-checkout https://github.com/leejet/stable-diffusion.cpp.git /src/stable-diffusion.cpp && \ + cd /src/stable-diffusion.cpp && \ + git fetch --depth=1 origin ${SD_COMMIT_HASH} && \ + git checkout FETCH_HEAD && \ + git submodule update --init --recursive --depth=1 + +# Project builds (ordered by build time: fastest first) +FROM builder-base AS whisper-build +ARG BACKEND=cuda +COPY --from=whisper-source /src/whisper.cpp /build/whisper.cpp +COPY docker/install.sh /build/ +WORKDIR /build/whisper.cpp +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=whisper-${BACKEND},target=/build/whisper.cpp/build \ + bash /build/install.sh "$BACKEND" whisper + +FROM builder-base AS sd-build +ARG BACKEND=cuda +ARG SD_COMMIT_HASH=master +COPY --from=sd-source /src/stable-diffusion.cpp /build/stable-diffusion.cpp +COPY docker/install.sh /build/ +WORKDIR /build/stable-diffusion.cpp +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=sd-${BACKEND},target=/build/stable-diffusion.cpp/build \ + RELEASE_TAG="${SD_COMMIT_HASH}" bash /build/install.sh "$BACKEND" sd + +FROM builder-base AS llama-build +ARG BACKEND=cuda +ARG LLAMA_COMMIT_HASH=master +COPY --from=llama-source /src/llama.cpp /build/llama.cpp +COPY docker/install.sh /build/ +WORKDIR /build/llama.cpp +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=llama-${BACKEND},target=/build/llama.cpp/build \ + RELEASE_TAG="${LLAMA_COMMIT_HASH}" bash /build/install.sh "$BACKEND" llama + +# CUDA runtime +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda + +ENV DEBIAN_FRONTEND=noninteractive +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 python3 python3-pip curl ca-certificates git \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so +COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + +# Vulkan runtime +FROM ubuntu:22.04 AS runtime-vulkan + +ENV DEBIAN_FRONTEND=noninteractive +ENV VK_DRIVER_FILES=/usr/share/vulkan/icd.d/lvp_icd.x86_64.json:/usr/share/vulkan/icd.d/radeon_icd.x86_64.json:/usr/share/vulkan/icd.d/intel_icd.x86_64.json + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 libvulkan1 mesa-vulkan-drivers vulkan-tools \ + python3 python3-pip curl ca-certificates git \ + && rm -rf /var/lib/apt/lists/* + +# Final runtime +FROM runtime-${BACKEND} AS runtime + +ARG BACKEND=cuda +ARG LLAMA_COMMIT_HASH=unknown +ARG WHISPER_COMMIT_HASH=unknown +ARG SD_COMMIT_HASH=unknown + +ENV PATH="/usr/local/bin:${PATH}" + +COPY --from=llama-swap-builder /app/llama-swap /usr/local/bin/ +COPY --from=llama-swap-builder /app/llama-swap-version /tmp/ + +COPY docker/test-binaries.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/test-binaries.sh + +RUN pip3 install --no-cache-dir numpy sentencepiece + +WORKDIR /app + +COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ +COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ +COPY --from=llama-build /install/lib/ /usr/local/lib/ + +COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/ +COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/ +COPY --from=whisper-build /install/lib/ /usr/local/lib/ + +COPY --from=sd-build /install/bin/sd-server /usr/local/bin/ +COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/ +COPY --from=sd-build /install/lib/ /usr/local/lib/ + +RUN ldconfig + +RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \ + ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \ + ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion + +RUN set -e && \ + for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \ + test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \ + done && \ + echo "All binaries validated successfully" + +RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ + echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ + echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ + echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ + echo "backend: ${BACKEND}" >> /versions.txt && \ + echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt + +WORKDIR /models +CMD ["bash"] diff --git a/docker/build-image.sh b/docker/build-image.sh new file mode 100755 index 000000000..7695b6042 --- /dev/null +++ b/docker/build-image.sh @@ -0,0 +1,305 @@ +#!/bin/bash +# +# Build script for llama-swap-docker with commit hash pinning +# +# Usage: +# ./build-image.sh --cuda # Build CUDA image +# ./build-image.sh --vulkan # Build Vulkan image +# ./build-image.sh --cuda --no-cache # Build CUDA image without cache +# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit +# LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan # Override llama.cpp release tag (vulkan uses prebuilt binaries) +# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit +# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit +# +# Features: +# - Auto-detects latest commit hashes from git repos +# - Builds llama-swap from local source code +# - Allows environment variable overrides for reproducible builds +# - Cache-friendly: changing commit hash busts cache appropriately +# - Supports both CUDA and Vulkan backends (requires explicit flag) +# + +set -euo pipefail + +# Parse command line arguments +BACKEND="" +NO_CACHE=false + +if [[ $# -eq 0 ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + echo "" + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + exit 1 +fi + +for arg in "$@"; do + case $arg in + --cuda) + BACKEND="cuda" + ;; + --vulkan) + BACKEND="vulkan" + ;; + --no-cache) + NO_CACHE=true + ;; + --help|-h) + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)" + echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash" + echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash" + echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash" + exit 0 + ;; + esac +done + +# Validate backend selection +if [[ -z "$BACKEND" ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + exit 1 +fi + +# Configuration +if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then + # User provided a custom tag, use it as-is + : +elif [[ "$BACKEND" == "vulkan" ]]; then + DOCKER_IMAGE_TAG="llama-swap:vulkan" +else + DOCKER_IMAGE_TAG="llama-swap:cuda" +fi +DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}" + +# Single unified Dockerfile, backend selected via build arg +DOCKERFILE="Dockerfile" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Building for: Vulkan (AMD GPUs and compatible hardware)" +else + echo "Building for: CUDA (NVIDIA GPUs)" +fi + +# Git repository URLs +LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" +WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" +SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" + +# Function to get the latest commit hash from a git repo's default branch +get_latest_commit() { + local repo_url="$1" + local branch="${2:-master}" + + # Try to get the latest commit hash for the specified branch + git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1 +} + +# Function to get the default branch name (master or main) +get_default_branch() { + local repo_url="$1" + + # Check for master first + if git ls-remote --heads "${repo_url}" master &>/dev/null; then + echo "master" + elif git ls-remote --heads "${repo_url}" main &>/dev/null; then + echo "main" + else + echo "master" # fallback + fi +} + +# Function to get the latest release tag from a GitHub repo +get_latest_release_tag() { + local owner_repo="$1" + curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \ + | grep '"tag_name"' | head -1 | cut -d'"' -f4 +} + +echo "==========================================" +echo "llama-swap-docker Build Script" +echo "==========================================" +echo "" + +# Determine commit hashes / release tags - use env vars or auto-detect +# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries). +# For cuda builds (or whisper on any backend), use git commit hashes. +if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then + LLAMA_HASH="${LLAMA_COMMIT_HASH}" + echo "llama.cpp: Using provided version: ${LLAMA_HASH}" +elif [[ "$BACKEND" == "vulkan" ]]; then + LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest release tag for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}" +else + LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}") + LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}") + if [[ -z "${LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for llama.cpp" >&2 + exit 1 + fi + echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}" +fi + +if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then + WHISPER_HASH="${WHISPER_COMMIT_HASH}" + echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}" +else + WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}") + WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}") + if [[ -z "${WHISPER_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for whisper.cpp" >&2 + exit 1 + fi + echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}" +fi + +if [[ -n "${SD_COMMIT_HASH:-}" ]]; then + SD_HASH="${SD_COMMIT_HASH}" + echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}" +elif [[ "$BACKEND" == "vulkan" ]]; then + SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}" +else + SD_BRANCH=$(get_default_branch "${SD_REPO}") + SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}") + if [[ -z "${SD_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2 + exit 1 + fi + echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}" +fi + +echo "" +echo "==========================================" +echo "Starting Docker build..." +echo "==========================================" +echo "" + +# Build the Docker image with commit hashes as build args +# Build context is the repository root (..) so the Dockerfile can access Go source +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +BUILD_ARGS=( + --build-arg "BACKEND=${BACKEND}" + --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" + --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" + --build-arg "SD_COMMIT_HASH=${SD_HASH}" + -t "${DOCKER_IMAGE_TAG}" + -f "${SCRIPT_DIR}/${DOCKERFILE}" +) + +if [[ "$NO_CACHE" == true ]]; then + BUILD_ARGS+=(--no-cache) + echo "Note: Building without cache" +fi + +# Use docker buildx with a custom builder for parallelism control +# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var +# We need to use a custom builder with a buildkitd.toml config file +BUILDER_NAME="llama-swap-builder" + +# Check if our custom builder exists with the right config, create/update if needed +if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then + echo "Creating custom buildx builder with max-parallelism=1..." + + # Create buildkitd.toml config file + cat > buildkitd.toml << 'BUILDKIT_EOF' +[worker.oci] + max-parallelism = 1 +BUILDKIT_EOF + + # Create the builder with the config + docker buildx create --name "$BUILDER_NAME" \ + --driver docker-container \ + --buildkitd-config buildkitd.toml \ + --use +else + # Switch to our builder + docker buildx use "$BUILDER_NAME" +fi + +echo "Building with sequential stages (one at a time), each using all CPU cores..." +echo "Using builder: $BUILDER_NAME" + +# Use docker buildx build with --load to load the image into Docker +# The --builder flag ensures we use our custom builder with max-parallelism=1 +# Build context is the repository root so we can access Go source files +docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}" + +echo "" +echo "==========================================" +echo "Verifying build artifacts..." +echo "==========================================" +echo "" + +# Verify all expected binaries exist in the image +MISSING_BINARIES=() + +for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do + if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then + MISSING_BINARIES+=("${binary}") + fi +done + +if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then + echo "ERROR: Build succeeded but the following binaries are missing from the image:" + for binary in "${MISSING_BINARIES[@]}"; do + echo " - ${binary}" + done + echo "" + echo "This usually indicates a build stage failure. Try running with --no-cache flag:" + echo " ./build-image.sh --vulkan --no-cache" + exit 1 +fi + +echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" + +echo "" +echo "==========================================" +echo "Build complete!" +echo "==========================================" +echo "" +echo "Image tag: ${DOCKER_IMAGE_TAG}" +echo "" +echo "Built with:" +echo " llama.cpp: ${LLAMA_HASH}" +echo " whisper.cpp: ${WHISPER_HASH}" +echo " stable-diffusion.cpp: ${SD_HASH}" +echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)" +echo "" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Run with:" + echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}" + echo "" + echo "Note: For AMD GPUs, you may also need to mount render devices:" + echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}" +else + echo "Run with:" + echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" +fi diff --git a/docker/install.sh b/docker/install.sh new file mode 100644 index 000000000..e4b6aaf02 --- /dev/null +++ b/docker/install.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# Usage: ./install.sh +# +# For vulkan builds of llama and sd, downloads prebuilt binaries from GitHub +# releases instead of building from source. Requires RELEASE_TAG env var. +# whisper.cpp has no prebuilt vulkan binaries, so it always builds from source. +set -e + +BACKEND="$1" +PROJECT="$2" + +if [ -z "$BACKEND" ] || [ -z "$PROJECT" ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +mkdir -p /install/bin /install/lib + +# --------------------------------------------------------------------------- +# Vulkan prebuilt binary download for llama and sd +# --------------------------------------------------------------------------- +if [ "$BACKEND" = "vulkan" ] && [ "$PROJECT" != "whisper" ]; then + if [ -z "${RELEASE_TAG:-}" ]; then + echo "ERROR: RELEASE_TAG env var required for vulkan prebuilt download of $PROJECT" >&2 + exit 1 + fi + + TMPDIR=$(mktemp -d) + trap 'rm -rf "$TMPDIR"' EXIT + + case "$PROJECT" in + llama) + # tag: b8429 asset: llama-b8429-bin-ubuntu-vulkan-x64.tar.gz + ASSET="llama-${RELEASE_TAG}-bin-ubuntu-vulkan-x64.tar.gz" + URL="https://github.com/ggml-org/llama.cpp/releases/download/${RELEASE_TAG}/${ASSET}" + echo "=== Downloading prebuilt llama.cpp vulkan binaries ===" + echo "URL: $URL" + curl -fSL -o "${TMPDIR}/release.tar.gz" "$URL" + tar xzf "${TMPDIR}/release.tar.gz" -C "${TMPDIR}" + + find "${TMPDIR}" -name "llama-server" -type f -exec cp {} /install/bin/ \; + find "${TMPDIR}" -name "llama-cli" -type f -exec cp {} /install/bin/ \; + find "${TMPDIR}" -name "*.so*" -type f -exec cp {} /install/lib/ \; + EXPECTED_BINS="llama-server llama-cli" + ;; + sd) + # tag: master-536-5265a5e asset: sd-master-5265a5e-bin-Linux-...-vulkan.zip + # The asset name drops the build number from the tag. + SD_BRANCH=$(echo "$RELEASE_TAG" | cut -d'-' -f1) + SD_HASH=$(echo "$RELEASE_TAG" | rev | cut -d'-' -f1 | rev) + ASSET="sd-${SD_BRANCH}-${SD_HASH}-bin-Linux-Ubuntu-24.04-x86_64-vulkan.zip" + URL="https://github.com/leejet/stable-diffusion.cpp/releases/download/${RELEASE_TAG}/${ASSET}" + echo "=== Downloading prebuilt sd.cpp vulkan binaries ===" + echo "URL: $URL" + curl -fSL -o "${TMPDIR}/release.zip" "$URL" + unzip -q "${TMPDIR}/release.zip" -d "${TMPDIR}" + + # sd.cpp release names the CLI binary "sd", rename to sd-cli + if find "${TMPDIR}" -name "sd" -not -name "sd-*" -type f | grep -q .; then + find "${TMPDIR}" -name "sd" -not -name "sd-*" -type f -exec cp {} /install/bin/sd-cli \; + else + find "${TMPDIR}" -name "sd-cli" -type f -exec cp {} /install/bin/ \; + fi + find "${TMPDIR}" -name "sd-server" -type f -exec cp {} /install/bin/ \; + find "${TMPDIR}" -name "*.so*" -type f -exec cp {} /install/lib/ \; + EXPECTED_BINS="sd-cli sd-server" + ;; + esac + + # Verify expected binaries were extracted + for bin in $EXPECTED_BINS; do + if [ ! -f "/install/bin/$bin" ]; then + echo "ERROR: $bin not found in downloaded release" >&2 + echo "Archive contents:" >&2 + find "${TMPDIR}" -type f >&2 + exit 1 + fi + done + + chmod +x /install/bin/* + echo "=== $PROJECT prebuilt vulkan binaries installed ===" + ls -la /install/bin/ + exit 0 +fi + +# --------------------------------------------------------------------------- +# Build from source (cuda, or vulkan whisper) +# --------------------------------------------------------------------------- +COMMON_FLAGS="-DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE=Release" + +case "$BACKEND" in + cuda) + COMMON_FLAGS="$COMMON_FLAGS + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89} + -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler + -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda + -DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" + ;; + vulkan) + COMMON_FLAGS="$COMMON_FLAGS + -DGGML_VULKAN=ON + -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/include + -DVulkan_LIBRARY=${VULKAN_SDK}/lib/libvulkan.so" + ;; + *) + echo "Unknown backend: $BACKEND" >&2 + exit 1 + ;; +esac + +case "$PROJECT" in + llama) + PROJECT_FLAGS="-DLLAMA_BUILD_TESTS=OFF" + [ "$BACKEND" = "vulkan" ] && PROJECT_FLAGS="$PROJECT_FLAGS -DGGML_BACKEND_DL=ON" + TARGETS="llama-cli llama-server" + ;; + whisper) + PROJECT_FLAGS="" + TARGETS="whisper-cli whisper-server" + ;; + sd) + PROJECT_FLAGS="-DSD_BUILD_EXAMPLES=OFF" + [ "$BACKEND" = "cuda" ] && PROJECT_FLAGS="$PROJECT_FLAGS -DSD_CUDA=ON" + [ "$BACKEND" = "vulkan" ] && PROJECT_FLAGS="$PROJECT_FLAGS -DSD_VULKAN=ON" + TARGETS="sd-cli sd-server" + ;; + *) + echo "Unknown project: $PROJECT" >&2 + exit 1 + ;; +esac + +rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true + +echo "=== Building $PROJECT for $BACKEND ===" + +# shellcheck disable=SC2086 +cmake -B build $COMMON_FLAGS $PROJECT_FLAGS +# shellcheck disable=SC2086 +cmake --build build --config Release -j"$(nproc)" --target $TARGETS + +for bin in $TARGETS; do + if [ ! -f "build/bin/$bin" ]; then + echo "FATAL: $bin not found in build/bin/" >&2 + exit 1 + fi + cp "build/bin/$bin" "/install/bin/" +done +find build -name "*.so*" -type f -exec cp {} /install/lib/ \; + +echo "=== $PROJECT build complete ===" +ls -la /install/bin/ diff --git a/docker/test-binaries.sh b/docker/test-binaries.sh new file mode 100755 index 000000000..1cced0a8b --- /dev/null +++ b/docker/test-binaries.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# Test script for verifying GPU-accelerated binaries work correctly +# Supports both CUDA and Vulkan backends, auto-detecting the environment + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Detect if real NVIDIA drivers are available +detect_cuda_drivers() { + local real_driver_paths=( + "/lib/x86_64-linux-gnu/libcuda.so.1" + "/usr/lib/x86_64-linux-gnu/libcuda.so.1" + "/usr/local/cuda/lib64/libcuda.so.1" + ) + + for path in "${real_driver_paths[@]}"; do + if [ -f "$path" ]; then + print_info "Real NVIDIA drivers found at: $path" + return 0 + fi + done + + return 1 +} + +# Detect Vulkan ICD availability +detect_vulkan() { + if [ -d "/usr/share/vulkan/icd.d" ] && ls /usr/share/vulkan/icd.d/*.json >/dev/null 2>&1; then + print_info "Vulkan ICDs found:" + ls /usr/share/vulkan/icd.d/*.json 2>/dev/null | while read -r f; do echo " $f"; done + return 0 + fi + return 1 +} + +# Main execution +print_info "Starting binary tests..." + +# Set up GPU library environment +if detect_cuda_drivers; then + print_info "Using real NVIDIA drivers" + export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" +elif [ -d "/usr/local/cuda/lib64/stubs" ]; then + print_warn "No real NVIDIA drivers detected" + print_warn "Falling back to stub drivers for testing" + print_warn "GPU functionality will NOT be available" + export LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + print_info "LD_LIBRARY_PATH set to: $LD_LIBRARY_PATH" +elif detect_vulkan; then + print_info "Vulkan backend detected" +else + print_warn "No GPU drivers detected (CPU-only mode)" +fi + +# Test all expected server binaries +BINARIES=(llama-server whisper-server sd-server) +FAILED=0 + +for binary in "${BINARIES[@]}"; do + print_info "Testing ${binary}..." + if command -v "$binary" &> /dev/null; then + if "$binary" --help > /dev/null 2>&1 || "$binary" -h > /dev/null 2>&1; then + print_info " $binary: OK" + else + print_error " $binary: Failed to run" + FAILED=1 + fi + else + print_error " $binary: Not found in PATH" + FAILED=1 + fi +done + +if [ "$FAILED" -ne 0 ]; then + print_error "Some binary tests failed!" + exit 1 +fi + +print_info "All binary tests passed!" + +# Additional environment info +print_info "Environment information:" +echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}" +echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}" + +if command -v nvidia-smi &> /dev/null; then + print_info "nvidia-smi output:" + nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader 2>/dev/null || \ + print_warn "nvidia-smi found but could not query GPU information" +elif command -v vulkaninfo &> /dev/null; then + print_info "Vulkan device info:" + vulkaninfo --summary 2>/dev/null | head -20 || \ + print_warn "vulkaninfo found but could not query device information" +else + print_warn "No GPU query tools available (expected on CPU-only hosts)" +fi + +exit 0