Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions .github/workflows/build-apple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,40 +109,6 @@ jobs:
cd build
ctest -L main --verbose --timeout 900

macos-latest-ios:
runs-on: macos-latest

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

# TODO: this likely does not do anything - if yes, remove it
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: apple-ios
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_APP=OFF \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

macos-latest-ios-xcode:
runs-on: macos-latest

Expand Down
48 changes: 12 additions & 36 deletions .github/workflows/build-openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,12 @@ env:

jobs:
ubuntu-24-openvino:
name: ubuntu-24-openvino-${{ matrix.openvino_device }}
runs-on: [self-hosted, Linux, Intel, OpenVINO]

concurrency:
group: openvino-${{ matrix.variant }}-${{ github.head_ref || github.ref }}
group: openvino-gpu-${{ github.head_ref || github.ref }}
cancel-in-progress: false

strategy:
matrix:
include:
- variant: cpu
runner: '"ubuntu-24.04"'
openvino_device: "CPU"
- variant: gpu
runner: '["self-hosted","Linux","Intel","OpenVINO"]'
openvino_device: "GPU"

runs-on: ${{ fromJSON(matrix.runner) }}

env:
# Sync versions in build-openvino.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.0"
Expand All @@ -63,31 +51,14 @@ jobs:
id: checkout
uses: actions/checkout@v6

- name: ccache
if: runner.environment == 'github-hosted'
uses: ggml-org/ccache-action@v1.2.21
with:
key: openvino-ubuntu-24.04-${{ matrix.variant }}-no-preset-v1
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install -y build-essential libssl-dev libtbb12 cmake ninja-build python3-pip
sudo apt-get install -y ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd

- name: Use OpenVINO Toolkit Cache
if: runner.environment == 'github-hosted'
uses: actions/cache@v5
id: cache-openvino
with:
path: ./openvino_toolkit
key: cache-gha-openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}

- name: Setup OpenVINO Toolkit
if: steps.cache-openvino.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-openvino
with:
path: ./openvino_toolkit
Expand All @@ -109,12 +80,17 @@ jobs:
-DGGML_OPENVINO=ON
time cmake --build build/ReleaseOV --config Release -j $(nproc)

- name: Test
id: cmake_test
- name: Test (CPU)
id: cmake_test_cpu
# TODO: fix and re-enable the `test-llama-archs` test below
run: |
cd ${{ github.workspace }}
ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000

- name: Test (GPU)
id: cmake_test_gpu
# TODO: fix and re-enable the `test-llama-archs` test below
run: |
cd ${{ github.workspace }}
if [ "${{ matrix.openvino_device }}" = "GPU" ]; then
export GGML_OPENVINO_DEVICE=GPU
fi
export GGML_OPENVINO_DEVICE=GPU
ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000
4 changes: 2 additions & 2 deletions .github/workflows/build-rpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ env:
LLAMA_ARG_LOG_TIMESTAMPS: 1

jobs:
ubuntu-latest-rpc:
runs-on: ubuntu-latest
ubuntu-24-rpc:
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}

continue-on-error: true

Expand Down
27 changes: 7 additions & 20 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ jobs:
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

gpu-vulkan:
gpu-vulkan-apple:
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand Down Expand Up @@ -261,7 +261,7 @@ jobs:
# a valid python environment for testing
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

cpu-openvino-low-perf:
gpu-openvino-low-perf:
runs-on: [self-hosted, Linux, Intel, OpenVINO]

concurrency:
Expand Down Expand Up @@ -297,8 +297,8 @@ jobs:
source ./openvino_toolkit/setupvars.sh
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

cpu-any-low-perf:
runs-on: [self-hosted, CPU]
cpu-x64-high-perf:
runs-on: [self-hosted, X64]

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


steps:
- name: Clone
Expand All @@ -308,22 +308,9 @@ jobs:
- name: Test
id: ggml-ci
run: |
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

cpu-any-high-perf:
runs-on: [self-hosted, CPU]

steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

- name: Test
id: ggml-ci
run: |
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

cpu-arm64-graviton4:
cpu-arm64-high-perf-graviton4:
runs-on: ah-ubuntu_22_04-c8g_8x

steps:
Expand Down Expand Up @@ -360,7 +347,7 @@ jobs:
- name: Test
id: ggml-ci
run: |
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

cpu-arm64-graviton4-kleidiai:
runs-on: ah-ubuntu_22_04-c8g_8x
Expand Down
14 changes: 3 additions & 11 deletions .github/workflows/build-vulkan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,8 @@ env:
LLAMA_ARG_LOG_TIMESTAMPS: 1

jobs:
ubuntu:
strategy:
matrix:
include:
- build: 'x64'
os: ubuntu-24.04
- build: 'arm64'
os: ubuntu-24.04-arm

runs-on: ${{ matrix.os }}
ubuntu-arm64:
runs-on: ubuntu-24.04-arm

steps:
- name: Clone
Expand All @@ -63,7 +55,7 @@ jobs:
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: vulkan-${{ matrix.os }}-new
key: vulkan-ubuntu-24.04-arm-new
variant: ccache
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
Expand Down
12 changes: 2 additions & 10 deletions .github/workflows/build-webgpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,7 @@ jobs:
ctest -L main -E test-backend-ops --verbose --timeout 900

ubuntu-wasm:
strategy:
matrix:
include:
- build: 'x64'
os: ubuntu-24.04
- build: 'arm64'
os: ubuntu-24.04-arm

runs-on: ${{ matrix.os }}
runs-on: ubuntu-24.04-arm

steps:
- name: Clone
Expand All @@ -148,7 +140,7 @@ jobs:
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: webgpu-${{ matrix.os }}-wasm
key: webgpu-ubuntu-24.04-arm-wasm
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

Expand Down
46 changes: 23 additions & 23 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,7 @@ concurrency:

jobs:
ubuntu:
runs-on: ubuntu-24.04

name: ubuntu (${{ matrix.wf_name }})
strategy:
matrix:
build_type: [Release]
wf_name: ["default"]
include:
- build_type: Release
extra_args: ""
wf_name: "default"
- build_type: Release
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
wf_name: "backend-sampling"
fail-fast: false
runs-on: ubuntu-24.04-arm

steps:
- name: Dependencies
Expand All @@ -96,7 +82,7 @@ jobs:
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: server-ubuntu-24.04-x64
key: server-ubuntu-24.04-arm
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

Expand All @@ -105,7 +91,7 @@ jobs:
run: |
cmake -B build \
-DGGML_SCHED_NO_REALLOC=ON
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
cmake --build build --config Release -j $(nproc) --target llama-server

- name: Python setup
id: setup_python
Expand All @@ -116,18 +102,32 @@ jobs:

- name: Tests
id: server_integration_tests
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
if: ${{ !github.event.pull_request }}
run: |
cd tools/server/tests
export ${{ matrix.extra_args }}
pytest -v -x -m "not slow"

- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
if: ${{ github.event.schedule || github.event.inputs.slow_tests == 'true' }}
run: |
cd tools/server/tests
export ${{ matrix.extra_args }}
SLOW_TESTS=1 pytest -v -x

- name: Tests (Backend sampling)
id: server_integration_tests_backend_sampling
if: ${{ !github.event.pull_request }}
run: |
cd tools/server/tests
export LLAMA_ARG_BACKEND_SAMPLING=1
pytest -v -x -m "not slow"

- name: Slow tests (Backend sampling)
id: server_integration_tests_slow_backend_sampling
if: ${{ github.event.schedule || github.event.inputs.slow_tests == 'true' }}
run: |
cd tools/server/tests
export LLAMA_ARG_BACKEND_SAMPLING=1
SLOW_TESTS=1 pytest -v -x

windows:
Expand Down Expand Up @@ -169,15 +169,15 @@ jobs:

- name: Tests
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
if: ${{ !github.event.pull_request }}
run: |
cd tools/server/tests
$env:PYTHONIOENCODING = ":replace"
pytest -v -x -m "not slow"

- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
if: ${{ github.event.schedule || github.event.inputs.slow_tests == 'true' }}
run: |
cd tools/server/tests
$env:SLOW_TESTS = "1"
Expand Down
Loading