From d83e6c549ab51f05ebbae3108a32bc1d9ad16090 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 20:48:16 +0300 Subject: [PATCH 01/13] ci : separate CUDA windows workflow + fix names --- .../{build-hip.yml => build-cuda-windows.yml} | 125 +++++++--------- .github/workflows/build-cuda.yml | 134 ++++++++++++++++++ .github/workflows/build-opencl.yml | 4 +- .github/workflows/build-openvino.yml | 2 +- .github/workflows/build-vulkan.yml | 51 ++++++- .github/workflows/build-webgpu.yml | 20 ++- .github/workflows/build.yml | 130 +---------------- .github/workflows/release.yml | 24 ++-- .github/workflows/server.yml | 6 +- 9 files changed, 270 insertions(+), 226 deletions(-) rename .github/workflows/{build-hip.yml => build-cuda-windows.yml} (56%) create mode 100644 .github/workflows/build-cuda.yml diff --git a/.github/workflows/build-hip.yml b/.github/workflows/build-cuda-windows.yml similarity index 56% rename from .github/workflows/build-hip.yml rename to .github/workflows/build-cuda-windows.yml index ff8283ae7b2..052cdf6d967 100644 --- a/.github/workflows/build-hip.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -1,28 +1,11 @@ -name: CI (hip) +name: CI (CUDA, windows) +# TODO: this workflow is only triggered manually because it is very heavy on the CI +# when we provision dedicated windows runners, we can enable it for pushes too +# note: running this workflow manually will populate the ccache for the builds +# this can be used to speed up the release builds on: workflow_dispatch: # allows manual triggering - push: - branches: - - master - paths: [ - '.github/workflows/build-hip.yml', - '**/CMakeLists.txt', - '**/.cmake', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp', - '**/*.cu', - '**/*.cuh' - ] - - pull_request: - types: [opened, synchronize, reopened] - paths: [ - '.github/workflows/build-hip.yml', - 'ggml/src/ggml-cuda/**' - ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -36,46 +19,70 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: + cuda: + runs-on: windows-2022 - ubuntu-22-hip: - runs-on: ubuntu-22.04 - container: rocm/dev-ubuntu-22.04:6.1.2 + strategy: + matrix: + cuda: ['12.4', '13.3'] steps: - name: Clone id: checkout uses: actions/checkout@v6 - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev rocwmma-dev - - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22-hip + key: windows-2022-cuda-${{ matrix.cuda }} + variant: ccache evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + save: true # always save the cache since we are running this manually + #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Install Cuda Toolkit + uses: ./.github/actions/windows-setup-cuda + with: + cuda_version: ${{ matrix.cuda }} + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja - - name: Build with native CMake HIP support + - name: Build id: cmake_build + shell: cmd + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project run: | - cmake -B build -S . \ - -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ - -DGGML_HIP_ROCWMMA_FATTN=ON \ - -DGPU_TARGETS="gfx1030" \ - -DGGML_HIP=ON - cmake --build build --config Release -j $(nproc) - - windows-latest-hip: + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DLLAMA_BUILD_SERVER=ON ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=ON ^ + -DGGML_CUDA=ON ^ + -DGGML_RPC=ON ^ + -DGGML_CUDA_CUB_3DOT2=ON + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% -t ggml + cmake --build build --config Release + + hip: runs-on: windows-2022 env: # Make sure this is in sync with build-cache.yml HIPSDK_INSTALLER_VERSION: "26.Q1" + strategy: + matrix: + include: + # sync with release.yml + - name: "radeon" + gpu_targets: "gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" + steps: - name: Clone id: checkout @@ -115,9 +122,10 @@ jobs: - name: Install ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ github.job }} + key: windows-2022-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + save: true # always save the cache since we are running this manually + #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - name: Build id: cmake_build @@ -136,32 +144,3 @@ jobs: -DGPU_TARGETS="gfx1100" ` -DGGML_RPC=ON cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - - ubuntu-22-musa: - runs-on: ubuntu-22.04 - container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dependencies - id: depends - run: | - apt-get update - apt-get install -y build-essential git cmake libssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ubuntu-22-musa - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build with native CMake MUSA support - id: cmake_build - run: | - cmake -B build -S . \ - -DGGML_MUSA=ON - time cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml new file mode 100644 index 00000000000..e5e3d09965d --- /dev/null +++ b/.github/workflows/build-cuda.yml @@ -0,0 +1,134 @@ +name: CI (CUDA, ubuntu) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-hip.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-hip.yml', + 'ggml/src/ggml-cuda/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + +jobs: + cuda: + runs-on: ubuntu-24.04 + container: nvidia/cuda:12.6.2-devel-ubuntu24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + apt update + apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: ubuntu-24.04-cuda + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with CMake + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + cmake -S . -B build -G Ninja \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON \ + -DGGML_CUDA_CUB_3DOT2=ON + cmake --build build + + hip: + runs-on: ubuntu-22.04 + container: rocm/dev-ubuntu-22.04:6.1.2 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev rocwmma-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: ubuntu-22.04-hip + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with native CMake HIP support + id: cmake_build + run: | + cmake -B build -S . \ + -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + -DGPU_TARGETS="gfx1030" \ + -DGGML_HIP=ON + cmake --build build --config Release -j $(nproc) + + musa: + runs-on: ubuntu-22.04 + container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + apt-get update + apt-get install -y build-essential git cmake libssl-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: ubuntu-22.04-musa + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with native CMake MUSA support + id: cmake_build + run: | + cmake -B build -S . \ + -DGGML_MUSA=ON + time cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build-opencl.yml b/.github/workflows/build-opencl.yml index fccb06b8821..c6c21960ad6 100644 --- a/.github/workflows/build-opencl.yml +++ b/.github/workflows/build-opencl.yml @@ -36,7 +36,7 @@ env: jobs: - windows-latest-opencl-adreno: + windows-2025-opencl-adreno: runs-on: windows-2025 steps: @@ -47,7 +47,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-llvm-arm64-opencl-adreno + key: windows-2025-llvm-arm64-opencl-adreno variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-openvino.yml b/.github/workflows/build-openvino.yml index 47e04869ca9..cf2c7b6e7f4 100644 --- a/.github/workflows/build-openvino.yml +++ b/.github/workflows/build-openvino.yml @@ -67,7 +67,7 @@ jobs: if: runner.environment == 'github-hosted' uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1 + key: ubuntu-24.04-openvino-${{ matrix.variant }}-no-preset-v1 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index b44f08c6e97..eef3f579dca 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -36,7 +36,54 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - ubuntu-24-vulkan-llvmpipe: + ubuntu: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-24.04 + - build: 'arm64' + os: ubuntu-24.04-arm + + runs-on: ${{ matrix.os }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: ${{ matrix.os }}-vulkan + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build + echo "CC=gcc-14" >> "$GITHUB_ENV" + echo "CXX=g++-14" >> "$GITHUB_ENV" + + - name: Configure + id: cmake_configure + run: | + cmake -B build \ + -G "Ninja" \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DGGML_VULKAN=ON + + - name: Build + id: cmake_build + run: | + time cmake --build build -j $(nproc) + + ubuntu-llvmpipe: runs-on: ubuntu-24.04 steps: @@ -47,7 +94,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-vulkan-llvmpipe + key: ubuntu-24.04-vulkan-llvmpipe evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-webgpu.yml b/.github/workflows/build-webgpu.yml index c7056358cc1..f8056676cc1 100644 --- a/.github/workflows/build-webgpu.yml +++ b/.github/workflows/build-webgpu.yml @@ -35,7 +35,7 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - macos-latest-webgpu: + macos: runs-on: macos-latest steps: @@ -76,7 +76,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - ubuntu-24-webgpu: + ubuntu: runs-on: ubuntu-24.04 steps: @@ -87,7 +87,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-webgpu + key: ubuntu-24.04-webgpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -129,8 +129,16 @@ jobs: # test-backend-ops is too slow on llvmpipe, skip it ctest -L main -E test-backend-ops --verbose --timeout 900 - ubuntu-24-webgpu-wasm: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + ubuntu-wasm: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-24.04 + - build: 'arm64' + os: ubuntu-24.04-arm + + runs-on: ${{ matrix.os }} steps: - name: Clone @@ -140,7 +148,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-webgpu-wasm + key: ${{ matrix.os }}-webgpu-wasm evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1deab2315e4..3f7d6e0e091 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -79,7 +79,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-cpu-${{ matrix.build }} + key: ${{ matrix.os }}-cpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -131,46 +131,7 @@ jobs: ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - ubuntu-24-vulkan: - strategy: - matrix: - include: - - build: 'x64' - os: ubuntu-24.04 - - build: 'arm64' - os: ubuntu-24.04-arm - - runs-on: ${{ matrix.os }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build - echo "CC=gcc-14" >> "$GITHUB_ENV" - echo "CXX=g++-14" >> "$GITHUB_ENV" - - - name: Configure - id: cmake_configure - run: | - cmake -B build \ - -G "Ninja" \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DGGML_BACKEND_DL=ON \ - -DGGML_CPU_ALL_VARIANTS=ON \ - -DGGML_VULKAN=ON - - - name: Build - id: cmake_build - run: | - time cmake --build build -j $(nproc) - - windows-latest: + windows-cpu: runs-on: windows-2025 env: @@ -202,7 +163,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-${{ matrix.build }} + key: windows-2025-${{ matrix.build }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -268,88 +229,3 @@ jobs: # cd build # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - - ubuntu-latest-cuda: - runs-on: ubuntu-latest - container: nvidia/cuda:12.6.2-devel-ubuntu24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Install dependencies - env: - DEBIAN_FRONTEND: noninteractive - run: | - apt update - apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ubuntu-latest-cuda - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build with CMake - # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project - run: | - cmake -S . -B build -G Ninja \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURES=89-real \ - -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ - -DGGML_NATIVE=OFF \ - -DGGML_CUDA=ON \ - -DGGML_CUDA_CUB_3DOT2=ON - cmake --build build - - windows-2022-cuda: - runs-on: windows-2022 - - strategy: - matrix: - cuda: ['12.4'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: windows-cuda-${{ matrix.cuda }} - variant: ccache - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Install Cuda Toolkit - uses: ./.github/actions/windows-setup-cuda - with: - cuda_version: ${{ matrix.cuda }} - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Build - id: cmake_build - shell: cmd - # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DLLAMA_BUILD_SERVER=ON ^ - -DLLAMA_BUILD_BORINGSSL=ON ^ - -DGGML_NATIVE=OFF ^ - -DGGML_BACKEND_DL=ON ^ - -DGGML_CPU_ALL_VARIANTS=ON ^ - -DGGML_CUDA=ON ^ - -DGGML_RPC=ON ^ - -DGGML_CUDA_CUB_3DOT2=ON - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% -t ggml - cmake --build build --config Release diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 20b2dc915a1..6607a603d75 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -100,7 +100,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-${{ matrix.arch }} + key: ${{ matrix.os }}-${{ matrix.arch }} evict-old-files: 1d - name: Build @@ -165,7 +165,7 @@ jobs: if: ${{ matrix.build != 's390x' }} uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-cpu-${{ matrix.build }} + key: ${{ matrix.os }}-cpu evict-old-files: 1d - name: Dependencies @@ -241,7 +241,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-vulkan-${{ matrix.build }} + key: ${{ matrix.os }}-vulkan evict-old-files: 1d - name: Dependencies @@ -402,7 +402,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-openvino-release-no-preset-v1 + key: ubuntu-24.04-openvino-release-no-preset-v1 evict-old-files: 1d - name: Dependencies @@ -485,7 +485,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-cpu-${{ matrix.arch }} + key: windows-2025-cpu-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -556,7 +556,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }} + key: windows-2025-${{ matrix.backend }}-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -633,10 +633,10 @@ jobs: cache: "npm" cache-dependency-path: "tools/ui/package-lock.json" - - name: Install ccache + - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-cuda-${{ matrix.cuda }} + key: windows-2022-cuda-${{ matrix.cuda }} variant: ccache evict-old-files: 1d @@ -744,7 +744,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: windows-latest-sycl +# key: windows-2022-sycl # variant: ccache # evict-old-files: 1d # @@ -866,7 +866,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: ubuntu-24-sycl-${{ matrix.build }} +# key: ubuntu-24.04-sycl # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} # @@ -936,7 +936,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} + key: ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }} evict-old-files: 1d - name: Dependencies @@ -1058,7 +1058,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + key: windows-2022-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 evict-old-files: 1d - name: Install ROCm diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index b30e3337044..3b0049f5b25 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -55,7 +55,7 @@ concurrency: jobs: ubuntu: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 name: ubuntu (${{ matrix.wf_name }}) strategy: @@ -96,7 +96,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: server-ubuntu-default + key: ubuntu-24.04-server evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -144,7 +144,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: server-windows-default + key: windows-2025-server-x64-llvm evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} From c0381e115217f578f2f8b171de6710ab321a55c9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 20:48:48 +0300 Subject: [PATCH 02/13] ci : rename workflow --- .github/workflows/{build-cuda.yml => build-cuda-ubuntu.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{build-cuda.yml => build-cuda-ubuntu.yml} (100%) diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda-ubuntu.yml similarity index 100% rename from .github/workflows/build-cuda.yml rename to .github/workflows/build-cuda-ubuntu.yml From acf7181e664dfcb2e3074c8b10898f1bcd6dd3d0 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:10:25 +0300 Subject: [PATCH 03/13] ci : prefix cache names with workflow name --- .github/workflows/build-android.yml | 8 ++++---- .github/workflows/build-apple.yml | 17 ++++++++++++----- .github/workflows/build-cuda-ubuntu.yml | 10 +++++----- .github/workflows/build-cuda-windows.yml | 4 ++-- .github/workflows/build-msys.yml | 2 +- .github/workflows/build-opencl.yml | 3 +-- .github/workflows/build-openvino.yml | 2 +- .github/workflows/build-riscv.yml | 4 ++-- .github/workflows/build-rpc.yml | 1 - .github/workflows/build-sanitize.yml | 13 ------------- .github/workflows/build-self-hosted.yml | 8 -------- .github/workflows/build-sycl.yml | 4 ++-- .github/workflows/build-vulkan.yml | 4 ++-- .github/workflows/build-webgpu.yml | 6 +++--- .github/workflows/build.yml | 14 +++++++------- .github/workflows/hip-quality-check.yml | 2 +- .github/workflows/release.yml | 24 ++++++++++++------------ .github/workflows/server.yml | 4 ++-- 18 files changed, 57 insertions(+), 73 deletions(-) diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 713ccdc7ffb..0a53cdfb222 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -32,7 +32,7 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - android: + default: runs-on: ubuntu-latest steps: @@ -58,7 +58,7 @@ jobs: cd examples/llama.android ./gradlew build --no-daemon - android-ndk: + ndk: runs-on: ubuntu-latest container: image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3' @@ -92,7 +92,7 @@ jobs: name: llama-cpp-android-arm64-cpu path: pkg-adb/llama.cpp - android-arm64: + arm64: runs-on: ubuntu-latest env: @@ -106,7 +106,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: android-arm64 + key: android-ubuntu-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index d2c99d0d5af..2c354449287 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -48,7 +48,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-arm64 + key: apple-macos-latest-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -84,7 +84,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-x64 + key: apple-macos-latest-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -120,7 +120,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-ios + key: apple-macos-latest-ios evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -200,7 +200,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-tvos + key: apple-macos-latest-tvos evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -230,6 +230,13 @@ jobs: id: checkout uses: actions/checkout@v6 + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: apple-macos-latest-visionos + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + - name: Build id: cmake_build run: | @@ -264,7 +271,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-swift + key: apple-macos-latest-swift evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-cuda-ubuntu.yml b/.github/workflows/build-cuda-ubuntu.yml index e5e3d09965d..6271b22cbd2 100644 --- a/.github/workflows/build-cuda-ubuntu.yml +++ b/.github/workflows/build-cuda-ubuntu.yml @@ -6,7 +6,7 @@ on: branches: - master paths: [ - '.github/workflows/build-hip.yml', + '.github/workflows/build-cuda-ubuntu.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', @@ -20,7 +20,7 @@ on: pull_request: types: [opened, synchronize, reopened] paths: [ - '.github/workflows/build-hip.yml', + '.github/workflows/build-cuda-ubuntu.yml', 'ggml/src/ggml-cuda/**' ] @@ -55,7 +55,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-cuda + key: cuda-ubuntu-24.04-cuda evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -90,7 +90,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22.04-hip + key: cuda-ubuntu-22.04-hip evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -122,7 +122,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22.04-musa + key: cuda-ubuntu-22.04-musa evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index 052cdf6d967..28b2a4f688a 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -34,7 +34,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2022-cuda-${{ matrix.cuda }} + key: cuda-windows-2022-x64-cuda-${{ matrix.cuda }} variant: ccache evict-old-files: 1d save: true # always save the cache since we are running this manually @@ -122,7 +122,7 @@ jobs: - name: Install ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2022-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + key: cuda-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 evict-old-files: 1d save: true # always save the cache since we are running this manually #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml index 8214f2b8da8..c2633c151a5 100644 --- a/.github/workflows/build-msys.yml +++ b/.github/workflows/build-msys.yml @@ -37,7 +37,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@v1.2.16 # with: - # key: windows-msys2 + # key: msys-windows-2025-x64 # variant: ccache # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-opencl.yml b/.github/workflows/build-opencl.yml index c6c21960ad6..251b1f8d593 100644 --- a/.github/workflows/build-opencl.yml +++ b/.github/workflows/build-opencl.yml @@ -35,7 +35,6 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - windows-2025-opencl-adreno: runs-on: windows-2025 @@ -47,7 +46,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2025-llvm-arm64-opencl-adreno + key: opencl-windows-2025-x64 variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-openvino.yml b/.github/workflows/build-openvino.yml index cf2c7b6e7f4..35a955f75ce 100644 --- a/.github/workflows/build-openvino.yml +++ b/.github/workflows/build-openvino.yml @@ -67,7 +67,7 @@ jobs: if: runner.environment == 'github-hosted' uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-openvino-${{ matrix.variant }}-no-preset-v1 + key: openvino-ubuntu-24.04-${{ matrix.variant }}-no-preset-v1 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml index c12aaa61fec..70615378b5e 100644 --- a/.github/workflows/build-riscv.yml +++ b/.github/workflows/build-riscv.yml @@ -69,7 +69,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1 # with: - # key: ubuntu-cpu-riscv64-native + # key: riscv-ubuntu-native # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -139,7 +139,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1 # with: - # key: ubuntu-riscv64-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }} + # key: riscv-ubuntu-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }} # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-rpc.yml b/.github/workflows/build-rpc.yml index c1ff98770e9..c060b649b82 100644 --- a/.github/workflows/build-rpc.yml +++ b/.github/workflows/build-rpc.yml @@ -34,7 +34,6 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - ubuntu-latest-rpc: runs-on: ubuntu-latest diff --git a/.github/workflows/build-sanitize.yml b/.github/workflows/build-sanitize.yml index 29f7a2922b2..e242abcfd3c 100644 --- a/.github/workflows/build-sanitize.yml +++ b/.github/workflows/build-sanitize.yml @@ -41,19 +41,6 @@ jobs: id: checkout uses: actions/checkout@v6 - #- name: ccache - # uses: ggml-org/ccache-action@v1.2.21 - # with: - # key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }} - # evict-old-files: 1d - # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - #- name: Dependencies - # id: depends - # run: | - # sudo apt-get update - # sudo apt-get install build-essential libssl-dev - # with UNDEFINED sanitizer, we have to build in Debug to avoid GCC 13 false-positive warnings - name: Build (undefined) id: cmake_build_undefined diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index 9ff470ea3b7..381cd3ce1b1 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -396,14 +396,6 @@ jobs: sudo apt-get update sudo apt-get install -y cmake - # note: sparing some ccache since these jobs run on dedicated runners that are not part of the organitzation - #- name: ccache - # uses: ggml-org/ccache-action@v1.2.21 - # with: - # key: arm64-cpu-kleidiai-graviton4 - # evict-old-files: 1d - # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - name: Test id: ggml-ci run: | diff --git a/.github/workflows/build-sycl.yml b/.github/workflows/build-sycl.yml index b0697f2f2b9..ef377c8186f 100644 --- a/.github/workflows/build-sycl.yml +++ b/.github/workflows/build-sycl.yml @@ -88,7 +88,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: ubuntu-24-sycl-${{ matrix.build }} +# key: sycl-ubuntu-24-${{ matrix.build }} # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} # @@ -150,7 +150,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: windows-latest-sycl +# key: sycl-windows-latest # variant: ccache # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index eef3f579dca..e6eab8fd0aa 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -55,7 +55,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-vulkan + key: vulkan-${{ matrix.os }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -94,7 +94,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-vulkan-llvmpipe + key: vulkan-ubuntu-24.04-llvmpipe evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-webgpu.yml b/.github/workflows/build-webgpu.yml index f8056676cc1..1974511a922 100644 --- a/.github/workflows/build-webgpu.yml +++ b/.github/workflows/build-webgpu.yml @@ -46,7 +46,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-webgpu + key: webgpu-macos-latest evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -87,7 +87,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-webgpu + key: webgpu-ubuntu-24.04 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -148,7 +148,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-webgpu-wasm + key: webgpu-${{ matrix.os }}-wasm evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3f7d6e0e091..ee0f8b88844 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,4 +1,4 @@ -name: CI +name: CI (cpu) on: workflow_dispatch: # allows manual triggering @@ -6,7 +6,7 @@ on: branches: - master paths: [ - '.github/workflows/build.yml', + '.github/workflows/build-cpu.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -27,7 +27,7 @@ on: pull_request: types: [opened, synchronize, reopened] paths: [ - '.github/workflows/build.yml', + '.github/workflows/build-cpu.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -60,7 +60,7 @@ jobs: build-cmake-pkg: uses: ./.github/workflows/build-cmake-pkg.yml - ubuntu-cpu: + ubuntu: strategy: matrix: include: @@ -79,7 +79,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-cpu + key: cpu-${{ matrix.os }} evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -131,7 +131,7 @@ jobs: ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - windows-cpu: + windows: runs-on: windows-2025 env: @@ -163,7 +163,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2025-${{ matrix.build }} + key: cpu-windows-2025-${{ matrix.build }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/hip-quality-check.yml b/.github/workflows/hip-quality-check.yml index 5d03b177293..14b9f41a6ec 100644 --- a/.github/workflows/hip-quality-check.yml +++ b/.github/workflows/hip-quality-check.yml @@ -50,7 +50,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22-hip-quality-check + key: hip-quality-check-ubuntu-22.04 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6607a603d75..816e51ea39a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -100,7 +100,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-${{ matrix.arch }} + key: release-${{ matrix.os }}-${{ matrix.arch }} evict-old-files: 1d - name: Build @@ -165,7 +165,7 @@ jobs: if: ${{ matrix.build != 's390x' }} uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-cpu + key: release-${{ matrix.os }}-cpu evict-old-files: 1d - name: Dependencies @@ -241,7 +241,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ${{ matrix.os }}-vulkan + key: release-${{ matrix.os }}-vulkan evict-old-files: 1d - name: Dependencies @@ -314,7 +314,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: android-arm64 + key: release-android-arm64 evict-old-files: 1d - name: Set up JDK @@ -402,7 +402,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-openvino-release-no-preset-v1 + key: release-ubuntu-24.04-openvino-release-no-preset-v1 evict-old-files: 1d - name: Dependencies @@ -485,7 +485,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2025-cpu-${{ matrix.arch }} + key: release-windows-2025-${{ matrix.arch }}-cpu variant: ccache evict-old-files: 1d @@ -556,7 +556,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2025-${{ matrix.backend }}-${{ matrix.arch }} + key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }} variant: ccache evict-old-files: 1d @@ -636,7 +636,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2022-cuda-${{ matrix.cuda }} + key: release-windows-2022-x64-cuda-${{ matrix.cuda }} variant: ccache evict-old-files: 1d @@ -744,7 +744,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: windows-2022-sycl +# key: release-windows-2022-x64-sycl # variant: ccache # evict-old-files: 1d # @@ -866,7 +866,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: ubuntu-24.04-sycl +# key: release-ubuntu-24.04-sycl # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} # @@ -936,7 +936,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }} + key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }} evict-old-files: 1d - name: Dependencies @@ -1058,7 +1058,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2022-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} evict-old-files: 1d - name: Install ROCm diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 3b0049f5b25..ffdc7c17dac 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -96,7 +96,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24.04-server + key: server-ubuntu-24.04-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -144,7 +144,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-2025-server-x64-llvm + key: server-windows-2025-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} From a9cb3cfdb000628ce751b2a8b455a6615bf48d4a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:10:46 +0300 Subject: [PATCH 04/13] ci : rename build.yml -> build-cpu.yml --- .github/workflows/{build.yml => build-cpu.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{build.yml => build-cpu.yml} (100%) diff --git a/.github/workflows/build.yml b/.github/workflows/build-cpu.yml similarity index 100% rename from .github/workflows/build.yml rename to .github/workflows/build-cpu.yml From f9934370b1eb4441c9659e44b9cd13598ceb70bd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:12:34 +0300 Subject: [PATCH 05/13] ci : cache keys --- .github/workflows/build-cuda-windows.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index 28b2a4f688a..4a6e57a5625 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -34,7 +34,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: cuda-windows-2022-x64-cuda-${{ matrix.cuda }} + # note: this cache key is the same as in release.yml ! + key: release-windows-2022-x64-cuda-${{ matrix.cuda }} variant: ccache evict-old-files: 1d save: true # always save the cache since we are running this manually @@ -122,7 +123,8 @@ jobs: - name: Install ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: cuda-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + # note: this cache key is the same as in release.yml ! + key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} evict-old-files: 1d save: true # always save the cache since we are running this manually #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} From 8eaeb7df1f0b8598fbd1c324b30e309339a7573a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:16:13 +0300 Subject: [PATCH 06/13] ci : fix windows cuda/hip concurrency of release workflow --- .github/workflows/release.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 816e51ea39a..c8055fab457 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,10 +27,6 @@ on: '**/*.glsl' ] -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" @@ -615,6 +611,11 @@ jobs: needs: [check_release] if: ${{ needs.check_release.outputs.should_release == 'true' }} + # note: run these jobs one at a time for better cache reuse + concurrency: + group: ${{ github.workflow }}-windows-cuda + queue: max + runs-on: windows-2022 strategy: @@ -1018,6 +1019,11 @@ jobs: needs: [check_release] if: ${{ needs.check_release.outputs.should_release == 'true' }} + # note: run these jobs one at a time for better cache reuse + concurrency: + group: ${{ github.workflow }}-windows-hip + queue: max + runs-on: windows-2022 env: From 250f3a30dcfd41b5d2aacfc1132e4d5305c8af66 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:22:03 +0300 Subject: [PATCH 07/13] ci : fix apple cache names --- .github/workflows/build-apple.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index 2c354449287..7f2e29fb6d0 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -48,7 +48,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-arm64 + key: apple-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -84,7 +84,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-x64 + key: apple-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -120,7 +120,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-ios + key: apple-ios evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -200,7 +200,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-tvos + key: apple-tvos evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -233,7 +233,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-visionos + key: apple-visionos evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -271,7 +271,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: apple-macos-latest-swift + key: apple-swift evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} From 1a865fded84cadcf229cdbfb4c6cae936a7bbbf3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 21:27:30 +0300 Subject: [PATCH 08/13] ci : add TODOs --- .github/workflows/build-apple.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index 7f2e29fb6d0..54a3ed8597a 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -117,6 +117,7 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: @@ -197,6 +198,7 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: @@ -230,6 +232,7 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: @@ -268,6 +271,7 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: From c9c97d409c82de385f4b363d6cffd2149955a0ed Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 22:33:56 +0300 Subject: [PATCH 09/13] cont : keep just the last cache [no ci] --- .github/workflows/build-cuda-windows.yml | 8 ++++---- .github/workflows/release.yml | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index 4a6e57a5625..adbe9885e10 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -34,10 +34,10 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - # note: this cache key is the same as in release.yml ! key: release-windows-2022-x64-cuda-${{ matrix.cuda }} + max-size: "1000MB" variant: ccache - evict-old-files: 1d + evict-old-files: job # keep only the latest cache save: true # always save the cache since we are running this manually #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -123,9 +123,9 @@ jobs: - name: Install ccache uses: ggml-org/ccache-action@v1.2.21 with: - # note: this cache key is the same as in release.yml ! key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} - evict-old-files: 1d + max-size: "1000MB" + evict-old-files: job # keep only the latest cache save: true # always save the cache since we are running this manually #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c8055fab457..f04080224fd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -638,8 +638,9 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-cuda-${{ matrix.cuda }} + max-size: "1000MB" variant: ccache - evict-old-files: 1d + evict-old-files: job # keep only the latest cache - name: Install Cuda Toolkit uses: ./.github/actions/windows-setup-cuda @@ -1065,7 +1066,8 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} - evict-old-files: 1d + max-size: "1000MB" + evict-old-files: job # keep only the latest cache - name: Install ROCm if: steps.cache-rocm.outputs.cache-hit != 'true' From d645f84580a6ac8f20a19f57211d47eeb702bee4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 28 May 2026 07:51:33 +0300 Subject: [PATCH 10/13] ci : update release concurrency to queue --- .github/workflows/build-android.yml | 18 ++++--- .github/workflows/build-cuda-windows.yml | 20 +++----- .github/workflows/release.yml | 61 +++++++++++------------- 3 files changed, 46 insertions(+), 53 deletions(-) diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 0a53cdfb222..a05248e1298 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -103,12 +103,18 @@ jobs: id: checkout uses: actions/checkout@v6 - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: android-ubuntu-arm64 - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789) + # for some reason, the ccache does not improve the build time in this case + # example: + # cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831 + # cache on: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394 + # + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.21 + # with: + # key: android-ubuntu-arm64 + # evict-old-files: 1d + # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - name: Set up JDK uses: actions/setup-java@v5 diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index adbe9885e10..4665a4377d0 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -2,14 +2,15 @@ name: CI (CUDA, windows) # TODO: this workflow is only triggered manually because it is very heavy on the CI # when we provision dedicated windows runners, we can enable it for pushes too -# note: running this workflow manually will populate the ccache for the builds +# note: running this workflow manually will populate the ccache for the release builds # this can be used to speed up the release builds on: workflow_dispatch: # allows manual triggering +# note: this will run in queue with the release workflow concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true + group: release + queue: max env: GGML_NLOOP: 3 @@ -35,11 +36,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-cuda-${{ matrix.cuda }} - max-size: "1000MB" - variant: ccache - evict-old-files: job # keep only the latest cache - save: true # always save the cache since we are running this manually - #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Cuda Toolkit uses: ./.github/actions/windows-setup-cuda @@ -120,14 +117,11 @@ jobs: } & $clangPath.FullName --version - - name: Install ccache + - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} - max-size: "1000MB" - evict-old-files: job # keep only the latest cache - save: true # always save the cache since we are running this manually - #save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Build id: cmake_build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f04080224fd..03abf7bc311 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -31,8 +31,12 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" -jobs: +# note: run these jobs one at a time for better cache reuse +concurrency: + group: release + queue: max +jobs: check_release: runs-on: [self-hosted, fast] @@ -97,7 +101,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-${{ matrix.os }}-${{ matrix.arch }} - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Build id: cmake_build @@ -162,7 +166,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-${{ matrix.os }}-cpu - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -238,7 +242,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-${{ matrix.os }}-vulkan - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -307,11 +311,17 @@ jobs: cache: "npm" cache-dependency-path: "tools/ui/package-lock.json" - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: release-android-arm64 - evict-old-files: 1d + # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789) + # for some reason, the ccache does not improve the build time in this case + # example: + # cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831 + # cache on: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394 + # + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.21 + # with: + # key: release-android-arm64 + # append-timestamp: false # note: use this only with non-concurrent jobs! - name: Set up JDK uses: actions/setup-java@v5 @@ -399,7 +409,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-ubuntu-24.04-openvino-release-no-preset-v1 - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies run: | @@ -482,8 +492,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2025-${{ matrix.arch }}-cpu - variant: ccache - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Ninja run: | @@ -553,8 +562,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }} - variant: ccache - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Vulkan SDK id: get_vulkan @@ -611,11 +619,6 @@ jobs: needs: [check_release] if: ${{ needs.check_release.outputs.should_release == 'true' }} - # note: run these jobs one at a time for better cache reuse - concurrency: - group: ${{ github.workflow }}-windows-cuda - queue: max - runs-on: windows-2022 strategy: @@ -638,9 +641,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-cuda-${{ matrix.cuda }} - max-size: "1000MB" - variant: ccache - evict-old-files: job # keep only the latest cache + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Cuda Toolkit uses: ./.github/actions/windows-setup-cuda @@ -747,8 +748,7 @@ jobs: # uses: ggml-org/ccache-action@v1.2.21 # with: # key: release-windows-2022-x64-sycl -# variant: ccache -# evict-old-files: 1d +# append-timestamp: false # note: use this only with non-concurrent jobs! # # - name: Build # id: cmake_build @@ -869,8 +869,7 @@ jobs: # uses: ggml-org/ccache-action@v1.2.21 # with: # key: release-ubuntu-24.04-sycl -# evict-old-files: 1d -# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} +# append-timestamp: false # note: use this only with non-concurrent jobs! # # - name: Build # id: cmake_build @@ -939,7 +938,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }} - evict-old-files: 1d + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -1020,11 +1019,6 @@ jobs: needs: [check_release] if: ${{ needs.check_release.outputs.should_release == 'true' }} - # note: run these jobs one at a time for better cache reuse - concurrency: - group: ${{ github.workflow }}-windows-hip - queue: max - runs-on: windows-2022 env: @@ -1066,8 +1060,7 @@ jobs: uses: ggml-org/ccache-action@v1.2.21 with: key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} - max-size: "1000MB" - evict-old-files: job # keep only the latest cache + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install ROCm if: steps.cache-rocm.outputs.cache-hit != 'true' From e206253f37a32953f4c10f231a6692746a5c8e40 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 28 May 2026 08:07:01 +0300 Subject: [PATCH 11/13] ci : move the release trigger to ubuntu-slim --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 03abf7bc311..d3fd6105c43 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,7 +38,7 @@ concurrency: jobs: check_release: - runs-on: [self-hosted, fast] + runs-on: ubuntu-slim outputs: should_release: ${{ steps.check.outputs.should_release }} From 7988c6effcf54c9d15a711de52ea2820296ba1eb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 28 May 2026 08:13:40 +0300 Subject: [PATCH 12/13] ci : hip add TODO --- .github/workflows/build-cuda-windows.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index 4665a4377d0..fe5c984d59d 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -120,7 +120,11 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} + # TODO: this build does not match the build in release.yml, so we use a different key + # ideally, the builds should match, similar to the CUDA build above so that we would be able + # to populate the ccache for the release with manual runs of this workflow + #key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} + key: cuda-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} append-timestamp: false # note: use this only with non-concurrent jobs! - name: Build From 5ecf7b4c3145fb485a589d51381deb88cd747c3f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 28 May 2026 09:28:49 +0300 Subject: [PATCH 13/13] cont : improve words [no ci] Co-authored-by: Georgi Gerganov --- .github/workflows/build-cuda-windows.yml | 4 ++-- .github/workflows/release.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml index fe5c984d59d..631ff4ed26b 100644 --- a/.github/workflows/build-cuda-windows.yml +++ b/.github/workflows/build-cuda-windows.yml @@ -3,7 +3,7 @@ name: CI (CUDA, windows) # TODO: this workflow is only triggered manually because it is very heavy on the CI # when we provision dedicated windows runners, we can enable it for pushes too # note: running this workflow manually will populate the ccache for the release builds -# this can be used to speed up the release builds +# this can be used before merging a PR to speed up the release workflow on: workflow_dispatch: # allows manual triggering @@ -120,7 +120,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - # TODO: this build does not match the build in release.yml, so we use a different key + # TODO: this build does not match the build in release.yml, so we use a different cache key # ideally, the builds should match, similar to the CUDA build above so that we would be able # to populate the ccache for the release with manual runs of this workflow #key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d3fd6105c43..c3a018425e2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -31,7 +31,7 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" -# note: run these jobs one at a time for better cache reuse +# note: run this workflow one at a time for better cache reuse concurrency: group: release queue: max