From 860ec05ff55a12249d92ac50890a46c1462215e3 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 24 Mar 2026 13:02:25 +0000 Subject: [PATCH] .github,docker/unified: include vulkan build Update docker/unified scripts to support building both cuda and vulkan unified images. --- .github/workflows/unified-docker.yml | 17 +++-- docker/unified/Dockerfile | 107 ++++++++++++++++++++------- docker/unified/build-image.sh | 63 +++++++++++----- docker/unified/install-llama.sh | 31 +++++--- docker/unified/install-sd.sh | 33 ++++++--- docker/unified/install-whisper.sh | 31 +++++--- 6 files changed, 202 insertions(+), 80 deletions(-) diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index 31b45703..b6e168ec 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -30,6 +30,10 @@ permissions: jobs: build: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + backend: [cuda, vulkan] steps: - name: Checkout code uses: actions/checkout@v4 @@ -61,24 +65,25 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build unified Docker image + - name: Build unified Docker image (${{ matrix.backend }}) env: LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }} WHISPER_REF: ${{ inputs.whisper_ref || 'master' }} SD_REF: ${{ inputs.sd_ref || 'master' }} LS_VERSION: ${{ inputs.llama_swap_version || 'main' }} - DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified + DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} # When running under act, use the local builder that has warm ccache. # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder # created by setup-buildx-action above. BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }} run: | chmod +x docker/unified/build-image.sh - docker/unified/build-image.sh + docker/unified/build-image.sh --${{ matrix.backend }} - name: Push to GitHub Container Registry if: ${{ !env.ACT }} run: | - docker push ghcr.io/mostlygeek/llama-swap:unified - docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d) - docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d) + docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} + DATE_TAG=$(date -u +%Y-%m-%d) + docker tag ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG} + docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG} diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile index a6ba3937..961ccecf 100644 --- a/docker/unified/Dockerfile +++ b/docker/unified/Dockerfile @@ -1,14 +1,18 @@ -# Unified multi-stage Dockerfile for CUDA-accelerated AI inference tools -# Includes: llama.cpp, whisper.cpp, stable-diffusion.cpp, llama-swap +# Unified multi-stage Dockerfile for AI inference tools +# Supports CUDA and Vulkan backends via BACKEND build arg # # Usage: -# docker buildx build -t llama-swap:unified . +# docker buildx build --build-arg BACKEND=cuda -t llama-swap:unified-cuda . +# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:unified-vulkan . # # Each project has its own install script that handles cloning, building, # and installing binaries. Build stages are independent for cache efficiency. -# Builder base: CUDA devel image with build tools -FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base +ARG BACKEND=cuda + +# ── Builder bases ────────────────────────────────────────────────────── + +FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda ENV DEBIAN_FRONTEND=noninteractive ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" @@ -23,42 +27,72 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /build -# Build whisper.cpp (fastest build, run first) +# ── + +FROM ubuntu:22.04 AS builder-base-vulkan + +ENV DEBIAN_FRONTEND=noninteractive +ENV CCACHE_DIR=/ccache +ENV CCACHE_MAXSIZE=2G +ENV PATH="/usr/lib/ccache:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential cmake git python3 python3-pip libssl-dev \ + curl ca-certificates ccache make wget software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +# Install LunarG Vulkan SDK (Ubuntu 22.04 repo headers are too old) +RUN wget -qO /etc/apt/trusted.gpg.d/lunarg.asc https://packages.lunarg.com/lunarg-signing-key-pub.asc \ + && echo "deb https://packages.lunarg.com/vulkan jammy main" > /etc/apt/sources.list.d/lunarg-vulkan.list \ + && apt-get update && apt-get install -y --no-install-recommends vulkan-sdk \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +# ── Select builder base by BACKEND ──────────────────────────────────── + +FROM builder-base-${BACKEND} AS builder-base + +# ── Build whisper.cpp (fastest build, run first) ────────────────────── + FROM builder-base AS whisper-build +ARG BACKEND=cuda ARG WHISPER_COMMIT_HASH=master COPY install-whisper.sh /build/ -RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ - --mount=type=cache,id=whisper-cuda,target=/src/whisper.cpp/build \ - bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}" +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \ + BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}" + +# ── Build stable-diffusion.cpp ──────────────────────────────────────── -# Build stable-diffusion.cpp FROM builder-base AS sd-build +ARG BACKEND=cuda ARG SD_COMMIT_HASH=master COPY install-sd.sh /build/ -RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ - --mount=type=cache,id=sd-cuda,target=/src/stable-diffusion.cpp/build \ - bash /build/install-sd.sh "${SD_COMMIT_HASH}" +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \ + BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}" + +# ── Build llama.cpp (slowest build, run last) ───────────────────────── -# Build llama.cpp (slowest build, run last) FROM builder-base AS llama-build +ARG BACKEND=cuda ARG LLAMA_COMMIT_HASH=master COPY install-llama.sh /build/ -RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ - --mount=type=cache,id=llama-cuda,target=/src/llama.cpp/build \ - bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}" +RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ + --mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \ + BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}" + +# ── Download llama-swap release binary ──────────────────────────────── -# Download llama-swap release binary FROM builder-base AS llama-swap-download ARG LS_VERSION=latest COPY install-llama-swap.sh /build/ RUN bash /build/install-llama-swap.sh "${LS_VERSION}" -# Runtime image (no build tooling) -FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime +# ── Runtime bases ───────────────────────────────────────────────────── -ARG LLAMA_COMMIT_HASH=unknown -ARG WHISPER_COMMIT_HASH=unknown -ARG SD_COMMIT_HASH=unknown +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda ENV DEBIAN_FRONTEND=noninteractive ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" @@ -69,8 +103,29 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && rm -rf /var/lib/apt/lists/* # CUDA stub drivers for container compatibility -COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so -COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 +COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so +COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + +# ── + +FROM ubuntu:22.04 AS runtime-vulkan + +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/usr/local/bin:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgomp1 libvulkan1 mesa-vulkan-drivers \ + python3 python3-pip curl ca-certificates git \ + && rm -rf /var/lib/apt/lists/* + +# ── Select runtime base by BACKEND ──────────────────────────────────── + +FROM runtime-${BACKEND} AS runtime + +ARG BACKEND=cuda +ARG LLAMA_COMMIT_HASH=unknown +ARG WHISPER_COMMIT_HASH=unknown +ARG SD_COMMIT_HASH=unknown RUN pip3 install --no-cache-dir numpy sentencepiece @@ -109,7 +164,7 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ - echo "backend: cuda" >> /versions.txt && \ + echo "backend: ${BACKEND}" >> /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt WORKDIR /models diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index 408ecb7d..7eae9808 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -1,32 +1,45 @@ #!/bin/bash # -# Build script for unified CUDA container with version pinning +# Build script for unified container with version pinning # # Usage: -# ./build-image.sh # Build with auto-detected versions -# ./build-image.sh --no-cache # Build without cache -# LLAMA_REF=b1234 ./build-image.sh # Pin llama.cpp to a commit hash -# LLAMA_REF=v1.2.3 ./build-image.sh # Pin llama.cpp to a tag -# LLAMA_REF=my-branch ./build-image.sh # Pin llama.cpp to a branch -# WHISPER_REF=v1.0.0 ./build-image.sh # Pin whisper.cpp to a tag -# SD_REF=master ./build-image.sh # Pin stable-diffusion.cpp to a branch -# LS_VERSION=170 ./build-image.sh # Override llama-swap version +# ./build-image.sh --cuda # Build CUDA image +# ./build-image.sh --vulkan # Build Vulkan image +# ./build-image.sh --cuda --no-cache # Build without cache +# LLAMA_REF=b1234 ./build-image.sh --vulkan # Pin llama.cpp to a commit hash +# LLAMA_REF=v1.2.3 ./build-image.sh --cuda # Pin llama.cpp to a tag +# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag +# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch +# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version # set -euo pipefail +BACKEND="" NO_CACHE=false for arg in "$@"; do case $arg in + --cuda) + BACKEND="cuda" + ;; + --vulkan) + BACKEND="vulkan" + ;; --no-cache) NO_CACHE=true ;; --help|-h) - echo "Usage: ./build-image.sh [--no-cache]" + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" echo "" echo "Environment variables:" - echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified)" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)" echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch" echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch" echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch" @@ -36,7 +49,14 @@ for arg in "$@"; do esac done -DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}" +if [[ -z "$BACKEND" ]]; then + echo "Error: No backend specified. Please use --cuda or --vulkan." + echo "" + echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]" + exit 1 +fi + +DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}}" # Git repository URLs LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" @@ -89,7 +109,7 @@ get_latest_hash() { } echo "==========================================" -echo "llama-swap Unified CUDA Build" +echo "llama-swap Unified Build (${BACKEND})" echo "==========================================" echo "" @@ -154,6 +174,7 @@ echo "" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" BUILD_ARGS=( + --build-arg "BACKEND=${BACKEND}" --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" --build-arg "SD_COMMIT_HASH=${SD_HASH}" @@ -166,7 +187,7 @@ if [[ "$NO_CACHE" == true ]]; then BUILD_ARGS+=(--no-cache) echo "Note: Building without cache" elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then - CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache" + CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache" BUILD_ARGS+=( --cache-from "type=registry,ref=${CACHE_REF}" --cache-to "type=registry,ref=${CACHE_REF},mode=max" @@ -196,7 +217,7 @@ if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then done echo "" echo "Try running with --no-cache flag:" - echo " ./build-image.sh --no-cache" + echo " ./build-image.sh --${BACKEND} --no-cache" exit 1 fi @@ -215,5 +236,13 @@ echo " whisper.cpp: ${WHISPER_HASH}" echo " stable-diffusion.cpp: ${SD_HASH}" echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)" echo "" -echo "Run with:" -echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" +if [[ "$BACKEND" == "vulkan" ]]; then + echo "Run with:" + echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}" + echo "" + echo "Note: For AMD GPUs, you may also need:" + echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}" +else + echo "Run with:" + echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}" +fi diff --git a/docker/unified/install-llama.sh b/docker/unified/install-llama.sh index c5c0847f..64ca43ca 100755 --- a/docker/unified/install-llama.sh +++ b/docker/unified/install-llama.sh @@ -1,9 +1,10 @@ #!/bin/bash -# Install llama.cpp - clone, build with CUDA, and install binaries -# Usage: ./install-llama.sh +# Install llama.cpp - clone, build, and install binaries +# Usage: BACKEND=cuda|vulkan ./install-llama.sh set -e COMMIT_HASH="${1:-master}" +BACKEND="${BACKEND:-cuda}" mkdir -p /install/bin /install/lib @@ -18,26 +19,36 @@ fi git fetch --depth=1 origin "${COMMIT_HASH}" git checkout FETCH_HEAD -# CUDA cmake flags + llama-specific flags +# Common cmake flags CMAKE_FLAGS=( -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE=Release - -DGGML_CUDA=ON - -DGGML_VULKAN=OFF - "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" - "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" - "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" - "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DLLAMA_BUILD_TESTS=OFF ) +if [ "$BACKEND" = "cuda" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + ) +elif [ "$BACKEND" = "vulkan" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=OFF + -DGGML_VULKAN=ON + ) +fi + TARGETS=(llama-cli llama-server) rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true -echo "=== Building llama.cpp for CUDA ===" +echo "=== Building llama.cpp for ${BACKEND} ===" cmake -B build "${CMAKE_FLAGS[@]}" cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}" diff --git a/docker/unified/install-sd.sh b/docker/unified/install-sd.sh index 48b36c68..2a781465 100755 --- a/docker/unified/install-sd.sh +++ b/docker/unified/install-sd.sh @@ -1,9 +1,10 @@ #!/bin/bash -# Install stable-diffusion.cpp - clone and build with CUDA, install binaries and library -# Usage: ./install-sd.sh +# Install stable-diffusion.cpp - clone, build, and install binaries and library +# Usage: BACKEND=cuda|vulkan ./install-sd.sh set -e COMMIT_HASH="${1:-master}" +BACKEND="${BACKEND:-cuda}" mkdir -p /install/bin /install/lib @@ -19,27 +20,37 @@ git fetch --depth=1 origin "${COMMIT_HASH}" git checkout FETCH_HEAD git submodule update --init --recursive --depth=1 -# CUDA cmake flags + sd-specific flags +# Common cmake flags CMAKE_FLAGS=( -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE=Release - -DGGML_CUDA=ON - -DGGML_VULKAN=OFF - "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" - "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" - "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" - "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DSD_BUILD_EXAMPLES=ON - -DSD_CUDA=ON ) +if [ "$BACKEND" = "cuda" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + -DSD_CUDA=ON + ) +elif [ "$BACKEND" = "vulkan" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=OFF + -DGGML_VULKAN=ON + ) +fi + TARGETS=(stable-diffusion sd-cli sd-server) rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true -echo "=== Building stable-diffusion.cpp for CUDA ===" +echo "=== Building stable-diffusion.cpp for ${BACKEND} ===" cmake -B build "${CMAKE_FLAGS[@]}" cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}" diff --git a/docker/unified/install-whisper.sh b/docker/unified/install-whisper.sh index da18b096..74d38c90 100755 --- a/docker/unified/install-whisper.sh +++ b/docker/unified/install-whisper.sh @@ -1,9 +1,10 @@ #!/bin/bash -# Install whisper.cpp - clone, build with CUDA, and install binaries -# Usage: ./install-whisper.sh +# Install whisper.cpp - clone, build, and install binaries +# Usage: BACKEND=cuda|vulkan ./install-whisper.sh set -e COMMIT_HASH="${1:-master}" +BACKEND="${BACKEND:-cuda}" mkdir -p /install/bin /install/lib @@ -18,25 +19,35 @@ fi git fetch --depth=1 origin "${COMMIT_HASH}" git checkout FETCH_HEAD -# CUDA cmake flags +# Common cmake flags CMAKE_FLAGS=( -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE=Release - -DGGML_CUDA=ON - -DGGML_VULKAN=OFF - "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" - "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" - "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" - "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ) +if [ "$BACKEND" = "cuda" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=ON + -DGGML_VULKAN=OFF + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda" + ) +elif [ "$BACKEND" = "vulkan" ]; then + CMAKE_FLAGS+=( + -DGGML_CUDA=OFF + -DGGML_VULKAN=ON + ) +fi + TARGETS=(whisper-cli whisper-server) rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true -echo "=== Building whisper.cpp for CUDA ===" +echo "=== Building whisper.cpp for ${BACKEND} ===" cmake -B build "${CMAKE_FLAGS[@]}" cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"