Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
192 commits
Select commit Hold shift + click to select a range
c8e213d
CANN: Enable labeler for Ascend NPU (#13914)
shink Jun 9, 2025
bbd51ae
add geglu activation function (#14074)
huydt84 Jun 9, 2025
84aec15
sycl: Add reorder to Q6_K mmvq implementation (#13885)
s-Nick Jun 9, 2025
0363bd9
server : fix LRU check (#14079)
ggerganov Jun 9, 2025
cdaacee
webui: fix sidebar being covered by main content (#14082)
yeahdongcn Jun 9, 2025
9e1330c
CANN: Simplify the environment variable setting(#13104)
bachelor-dou Jun 9, 2025
652d610
graph : fix geglu (#14077)
ggerganov Jun 9, 2025
4eebf7c
cuda : fix device sync on buffer clear (#14033)
slaren Jun 9, 2025
55adae4
ggml-cpu : split arch-specific implementations (#13892)
xctan Jun 9, 2025
f707247
llama : allow building all tests on windows when not using shared lib…
slaren Jun 9, 2025
6341356
kv-cache : fix shift and defrag logic (#14081)
ggerganov Jun 9, 2025
9b90308
metal : use less stack memory in FA kernel (#14088)
ggerganov Jun 9, 2025
5d9c218
Add in-build ggml::ggml ALIAS library (ggml/1260)
dg0yt Jun 3, 2025
f6eca5c
sync : ggml
ggerganov Jun 10, 2025
e227eef
rpc : nicer error messages for RPC server crash (#14076)
isaac-mcfadyen Jun 10, 2025
2375744
Vulkan: Don't default to CPU device (like llvmpipe), even if no other…
0cc4m Jun 10, 2025
b4d1bcb
ggml : fix weak alias win32 (whisper/0)
ggerganov Jun 10, 2025
c13372c
sync : ggml
ggerganov Jun 10, 2025
50162e6
Fixed spec timings to: accepted/tested instead of accepted/drafted (#…
jukofyork Jun 10, 2025
11d3265
vulkan: force device 0 in CI (#14106)
jeffbolznv Jun 10, 2025
627669e
llama : support GEGLU for jina-bert-v2 (#14090)
CISC Jun 10, 2025
34641af
convert : fix duplicate key DeepSeek-R1 conversion error (#14103)
CISC Jun 10, 2025
cf34b12
kv-cache : avoid modifying recurrent cells when setting inputs (#13834)
compilade Jun 10, 2025
39d4a90
opencl: add `mul_mv_id_q4_0_f32_8x_flat` (#14003)
lhez Jun 10, 2025
e412729
vulkan: Track descriptor pools/sets per-context (#14109)
jeffbolznv Jun 11, 2025
038e0ef
kv-cache : add LLAMA_KV_CACHE_DEBUG environment variable (#14121)
ggerganov Jun 11, 2025
5043576
server : pass default --keep argument (#14120)
MightyAlex200 Jun 11, 2025
5fa57da
kv-cache : relax SWA masking condition (#14119)
ggerganov Jun 11, 2025
3539498
webui: Wrap long numbers instead of infinite horizontal scroll (#14062)
am17an Jun 11, 2025
11d112f
vulkan: Better thread-safety for command pools/buffers (#14116)
jeffbolznv Jun 11, 2025
b61ab1a
tests : add test-tokenizers-repo (#14017)
CISC Jun 11, 2025
7d9211b
chore : clean up relative source dir paths (#14128)
CISC Jun 11, 2025
caa52bc
Implement GGML_CPU_ALL_VARIANTS for ARM (#14080)
ckastner Jun 11, 2025
3472d6a
common: fix issue with regex_escape routine on windows (#14133)
bandoti Jun 11, 2025
bbe1d86
context : round n_tokens to next multiple of n_seqs when reserving (#…
compilade Jun 12, 2025
00c933f
kv-cache : fix split_equal handling in unified implementation (#14130)
ggerganov Jun 12, 2025
ae2e7f1
cmake : handle whitepsaces in path during metal build (#14126)
ggerganov Jun 12, 2025
e7cb2bb
batch : remove logits_all flag (#14141)
ggerganov Jun 12, 2025
96ee2f2
context : simplify output counting logic during decode (#14142)
ggerganov Jun 12, 2025
f7919ab
server : re-enable SWA speculative decoding (#14131)
ggerganov Jun 12, 2025
aa8c1ee
readme : remove project status link (#14149)
ggerganov Jun 12, 2025
ee60236
sycl: Remove not needed copy f16->f32 for dnnl mul mat (#14125)
ShanoToni Jun 12, 2025
8b8c7b5
vocab : prevent heap overflow when vocab is too small (#14145)
ggerganov Jun 13, 2025
4973338
cmake : Improve build-info.cpp generation (#14156)
ckastner Jun 13, 2025
bd245aa
SYCL: Bump oneMath commit (#14152)
Jun 13, 2025
cc04b82
sycl: Adding additional cpy dbg print output (#14034)
ShanoToni Jun 13, 2025
6e9863a
server : fix SWA condition for full context reprocess (#14163)
ggerganov Jun 13, 2025
11f5ff1
pooling : make cls_b and cls_out_b optional (#14165)
huydt84 Jun 13, 2025
8cdc20c
cmake: Add ability to pass in LLAMA_BUILD_NUMBER/COMMIT (#14167)
ckastner Jun 13, 2025
0e48242
readme : remove survey link (#14168)
ggerganov Jun 13, 2025
33e0a79
batch : rework llama_batch_allocr (#14153)
ggerganov Jun 13, 2025
8e7c3d1
docs : Update multimodal.md (#14122)
ddpasa Jun 13, 2025
b7c2305
batch : add LLAMA_BATCH_DEBUG environment variable (#14172)
ggerganov Jun 13, 2025
cdc7c96
Merge commit from fork
GuyGoldenberg Jun 13, 2025
d3b5f17
sycl: fix docker image (#14144)
sgeor255 Jun 13, 2025
1d03941
vocab : fix build (#14175)
ggerganov Jun 13, 2025
ea76fac
compare-llama-bench: add option to plot (#14169)
am17an Jun 14, 2025
dbf9c07
llama-chat : Do not throw when tool parsing fails (#14012)
p1-0tr Jun 14, 2025
05747f9
docs : remove WIP since PR has been merged (#13912)
pepijndevos Jun 15, 2025
8e95c37
batch : auto-gen positions + verify multi-sequence input (#14177)
ggerganov Jun 15, 2025
4d77526
cparams : rename LLAMA_MAX_PARALLEL_SEQUENCES to LLAMA_MAX_SEQ (#14188)
ggerganov Jun 15, 2025
313c61a
model : add dots.llm1 architecture support (#14044) (#14118)
Noeda Jun 15, 2025
595374e
kv-cache : fix use-after-move of defrag info (#14189)
ggerganov Jun 15, 2025
0e68fab
HIP: Replace usage of depricated preprocessor macro __AMDGCN_WAVEFRON…
IMbackK Jun 15, 2025
9fe2d8c
CUDA/HIP: fix ssm_scan on devices where warp size is not 32 (#14196)
IMbackK Jun 15, 2025
a3f2646
quantize : change int to unsigned int for KV overrides (#14197)
EAddario Jun 15, 2025
4a17628
server : When listening on a unix domain socket don't print http:// a…
ericcurtin Jun 15, 2025
d7d67ea
model : Add support for Arcee AI's upcoming AFM model (#14185)
bartowski1182 Jun 15, 2025
bd03b66
ggml-cpu : rework weak alias on apple targets (#14146)
xctan Jun 16, 2025
41efafc
vulkan: mutex around vkQueueSubmit (#14127)
jeffbolznv Jun 16, 2025
c72974f
gguf-py : allow key override when adding value to GGUFWriter (#14194)
huydt84 Jun 16, 2025
b4a4288
convert : remove arcee change in convert_hf_to_gguf_update.py (#14207)
bartowski1182 Jun 16, 2025
1c4aea3
ggml: Add Android support for GGML_CPU_ALL_VARIANTS (#14206)
chaxu01 Jun 16, 2025
6ce670e
llama : rework embeddings logic (#14208)
ggerganov Jun 16, 2025
0efd110
HIP: disable rocwmma on gfx12 by default until rocm 7.0 (#14202)
IMbackK Jun 16, 2025
aa25714
model : add NeoBERT (#14164)
huydt84 Jun 16, 2025
714684a
cmake: clean up external project logic for vulkan-shaders-gen (#14179)
bandoti Jun 16, 2025
ca3c490
llama : add thread safety test (#14035)
slaren Jun 16, 2025
44e4d3b
server : fix incorrect usage of llama_get_embeddings() (#14225)
ggerganov Jun 16, 2025
4323b94
common : suggest --jinja when autodetection fails (#14222)
CISC Jun 16, 2025
abc759f
musa: fix build warning (unused variable) (#14231)
yeahdongcn Jun 17, 2025
84c4938
ggml-cpu : remove the weak alias trick (#14221)
xctan Jun 17, 2025
0b4b271
cmake: remove shader-gen step-targets from ggml-vulkan (#14226)
bandoti Jun 17, 2025
d3a0914
examples : include examples in msvc disable warn (ggml/1270)
danbev Jun 12, 2025
17e1e35
ggml : remove unused ggml_context_container (ggml/1272)
danbev Jun 13, 2025
2d57d99
ggml : disable warnings for tests when using MSVC (ggml/1273)
danbev Jun 13, 2025
8023efa
sync : ggml
ggerganov Jun 18, 2025
d311034
convert : fix null head_dim AutoConfig regression (#14248)
CISC Jun 18, 2025
c8f423b
llama-chat : fix multiple system message for gemma, orion (#14246)
ngxson Jun 18, 2025
f4cee55
mtmd : refactor llava-uhd preprocessing logic (#14247)
ngxson Jun 18, 2025
51c1766
ggml: Add Apple support for GGML_CPU_ALL_VARIANTS (#14258)
chaxu01 Jun 18, 2025
708fcea
ggml-cpu: fix uncaught underscore terminators (#14023)
taronaeo Jun 18, 2025
13c556e
ggml-cpu: reduce asm calls for hsum (#14037)
taronaeo Jun 18, 2025
6e9c823
docs: add s390x build documentation (#14264)
taronaeo Jun 18, 2025
0db418d
metal : add mean kernel (#14267)
ggerganov Jun 19, 2025
0fc0feb
memory : Hybrid recurrent cache (#13979)
gabe-l-hart Jun 19, 2025
35fc17b
Vulkan: Set device max size for host memory to avoid OOM warning and …
0cc4m Jun 19, 2025
2fd2667
llamafile : support s390x SIMD instruction set (#14273)
taronaeo Jun 19, 2025
7577185
convert : fix remote option in Windows (#14100)
pqnet Jun 19, 2025
e106cfe
llama-bench : add --no-warmup flag (#14224) (#14270)
s2010 Jun 19, 2025
ace9d19
sycl: Cleanup codepaths in Get Rows in sycl backend (#14215)
ShanoToni Jun 19, 2025
bdd16d2
build : suppress gcc15 compile warnings (#14261)
fanyang89 Jun 19, 2025
28076dc
server : add server parameters for draft model cache type (#13782)
aa956 Jun 19, 2025
d54b24f
gguf-py : make sentencepiece optional (#14200)
Ahajha Jun 19, 2025
ab2de47
ggml-cpu : remove unnecesary arm feature detection (#14281)
slaren Jun 19, 2025
4a5248e
CUDA: add conv_2d_dw (#14265)
am17an Jun 20, 2025
a596c8f
ubatch : new splitting logic (#14217)
ggerganov Jun 20, 2025
f1e9fd2
model : more uniform output id handling (#14275)
ggerganov Jun 20, 2025
2a5a3e3
ggml: Update KleidiAI to v1.9.0 (#14277)
chaxu01 Jun 20, 2025
65bdc38
ggml : fix repack work size for mul_mat_id (#14292)
ggerganov Jun 20, 2025
85b2815
cuda : synchronize graph capture and cublas handle destruction (#14288)
slaren Jun 20, 2025
cc7ec0c
llama : improve sep token handling (#14272)
CISC Jun 20, 2025
c3c4e29
Implement GGML_CPU_ALL_VARIANTS for PowerPC (#14286)
ckastner Jun 20, 2025
9b2a774
sycl: add usage of enqueue_functions extension (#14244)
s-Nick Jun 20, 2025
4bf6bc5
vocab : prevent tokenizer overflow (#14301)
retr0reg Jun 20, 2025
e6d6a55
lint : remove trailing whitepace (#14304)
CISC Jun 20, 2025
7cc2c0b
CUDA: add conv_2d_transpose (#14287)
am17an Jun 20, 2025
cf7d3ea
docs : fix the link to llama.h (#14293)
david20571015 Jun 20, 2025
bbbf060
Add `ggml_roll` (ggml/1274)
Acly Jun 18, 2025
107ea62
sync : ggml
ggerganov Jun 20, 2025
ffdd7a0
convert : fix Llama 4 conversion (#14311)
danielhanchen Jun 21, 2025
3de2fe4
memory : rename interface to llama_memory_context_i (#14296)
ggerganov Jun 21, 2025
eb336c1
metal : fix thread-safety (#14300)
ggerganov Jun 21, 2025
6f3eff5
gguf-py : fix TemplateProcessing pair when bos/eos is missing (#14312)
CISC Jun 21, 2025
ecdc6f4
Add support for VK_EXT_debug_utils to add labels to Vulkan objects. (…
mtavenrath Jun 21, 2025
034639d
gguf-py : fix Qwen3-Embedding eos token (#14314)
CISC Jun 21, 2025
0f1694d
CUDA: add mean operation (#14313)
am17an Jun 22, 2025
72ab543
common : use std::string_view now that we target c++17 (#14319)
CISC Jun 22, 2025
d13978e
mtmd : fix Pixtral OOM with large images by capping image_size to 102…
yuiseki Jun 22, 2025
3e11cbb
HIP: enable vec fattn on RDNA4 (#14323)
IMbackK Jun 22, 2025
fa8fca6
examples : fix is_first logic for tokenization (#14329)
ggerganov Jun 22, 2025
6a45b53
run : avoid double tokenization (#14327)
retr0reg Jun 22, 2025
ade7b2d
gguf-py : fix SpecialVocab parsing when post_processor is null (#14330)
CISC Jun 22, 2025
113c422
quantize : handle user-defined pruning of whole layers (blocks) (#13037)
EAddario Jun 22, 2025
dffd806
vulkan: update windows SDK in CI (#14334)
jeffbolznv Jun 23, 2025
f5e61b9
kv-cells : fix tracking of seq_pos (#14339)
ggerganov Jun 23, 2025
6296acc
CUDA: mul_mat_v support for batch sizes > 1 (#14262)
JohannesGaessler Jun 23, 2025
7ab587e
llama : better rwkv chat template and add missing `inputs.use_jinja` …
MollySophia Jun 23, 2025
ca0943e
vulkan: update windows SDK in release.yml (#14344)
jeffbolznv Jun 23, 2025
d750892
ci: add workflow for relocatable cmake package (#14346)
bandoti Jun 23, 2025
ec74860
CUDA/HIP: optimize mmv paths taken for HIP devices (#14324)
IMbackK Jun 23, 2025
1f7b36a
jinja : Add Mistral-Small-3.2-24B-Instruct-2506.jinja (#14349)
bartowski1182 Jun 24, 2025
2e44f93
main : honor --verbose-prompt on interactive prompts (#14350)
CISC Jun 24, 2025
38a6de6
server : move no API key doc to /health (#14352)
pnb Jun 24, 2025
5e7ce24
cmake : use LLAMA_BUILD_NUMBER when defining LLAMA_INSTALL_VERSION (#…
mbaudier Jun 24, 2025
1e8ba97
batch : fix check for empty sequences in memory (#14364)
ggerganov Jun 24, 2025
50b288f
opencl: ref count `ggml_backend_opencl_context` and refactor profilin…
lhez Jun 24, 2025
873e973
sycl: GGML_SYCL_DISABLE_OPT on by default for all Intel Devices (#13973)
ShanoToni Jun 25, 2025
8b4e408
ggml : do not output unprintable characters on GGUF load failure (#14…
CISC Jun 25, 2025
43e4955
ggml-cpu: enable IBM NNPA Vector Intrinsics (#14317)
taronaeo Jun 25, 2025
41e8618
musa: enable fp16 mma (all) and cublas on qy2 (#13842)
yeahdongcn Jun 26, 2025
3132b6e
docs: update s390x documentation + add faq (#14389)
taronaeo Jun 26, 2025
22575ad
metal : batch rows copy in a single threadgroup (#14384)
ggerganov Jun 26, 2025
7b2938b
metal : add special-case mat-vec mul for ne00 == 4 (#14385)
ggerganov Jun 26, 2025
ef9d687
llama : return mistral-v7-tekken as default template only (#14390)
CISC Jun 26, 2025
20e6e61
cmake: regen vulkan shaders when shaders-gen sources change (#14398)
bandoti Jun 26, 2025
71e3887
model : gemma3n text-only (#14400)
ngxson Jun 26, 2025
1910178
convert : fix broken sentencepiece vocab (#14416)
CISC Jun 27, 2025
827dd5b
ggml : add ggml_set_rows (#14274)
rgerganov Jun 27, 2025
6d1bb39
recurrent : call balloc split_reset() in init_batch() (#14414)
ggerganov Jun 27, 2025
d668167
graph : make llm_graph_context destructor virtual (#14410)
ggerganov Jun 27, 2025
deae1bc
vulkan: Fix GGML_VULKAN_SHADER_DEBUG_INFO (#14427)
jeffbolznv Jun 28, 2025
a490434
ci : fix windows build and release (#14431)
CISC Jun 28, 2025
567ae3e
fix async_mode bug (#14432)
bachelor-dou Jun 28, 2025
4aae9bc
model : add support for ERNIE 4.5 0.3B model (#14408)
ownia Jun 28, 2025
197286a
vulkan: lock accesses of pinned_memory vector (#14333)
jeffbolznv Jun 28, 2025
20dc224
vulkan: handle noncontig in the final case of ggml_vk_get_cpy_pipelin…
jeffbolznv Jun 28, 2025
6578557
CUDA: add bf16 and f32 support to cublas_mul_mat_batched (#14361)
am17an Jun 28, 2025
e5fa50b
vulkan: Add fusion support for RMS_NORM+MUL (#14366)
jeffbolznv Jun 29, 2025
16adbe1
ggml : implement REGLU/GEGLU/SWIGLU ops (#14158)
CISC Jun 29, 2025
0f5b1fd
ggml : fix unmerged GGML_FPxx_TO_FPxx refactoring (#14443)
CISC Jun 29, 2025
54caf5d
SYCL: disable faulty fp16 exp kernel (#14395)
qnixsynapse Jun 29, 2025
71c0d60
server : fix appearance of the chats list context menu for Safari (#1…
rntk Jun 29, 2025
630a82c
server : support jinja extra template kwargs (Qwen3 enable_thinking f…
matteoserva Jun 29, 2025
0d0ef3e
scripts : make the shell scripts cross-platform (#14341)
vedranmiletic Jun 30, 2025
0fb9ebb
cmake : Remove redundant include path in CMakeLists.txt (#14452)
xiaobing318 Jun 30, 2025
52d0667
test-backend-ops : disable llama test (#14461)
slaren Jun 30, 2025
195134c
ggml-cpu: sycl: Re-enable exp f16 (#14462)
Rbiessy Jun 30, 2025
89e5342
metal : disable fast-math for some cpy kernels (#14460)
ggerganov Jun 30, 2025
3e39a42
memory : correctly handle failure in apply() (#14438)
ggerganov Jun 30, 2025
a6b9824
Add Conv2d for CPU (#14388)
am17an Jun 30, 2025
780ba6d
opencl : add GEGLU, REGLU, SWIGLU (#14456)
lhez Jul 1, 2025
af33c35
ggml-quants : rename best_mad to best_error (ggml/1283)
danbev Jun 24, 2025
8b48266
ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285)
Acly Jul 1, 2025
f7ca5cc
sync : ggml
ggerganov Jul 1, 2025
afe880b
ggml : remove trailing whitespace (#0)
ggerganov Jul 1, 2025
edd05a2
add GELU_ERF (#14455)
CISC Jul 1, 2025
80b5906
vulkan: Split large mul_mat_id to fit in shared memory (#14451)
jeffbolznv Jul 1, 2025
41697f3
CANN: update aclnnGroupedMatmulV2 to aclnnGroupedMatmulV3 (#14411)
noemotiovon Jul 1, 2025
530c9a9
Add Vulkan images to docker.md (#14472)
xek Jul 1, 2025
244305f
ci : disable fast-math for Metal GHA CI (#14478)
ggerganov Jul 1, 2025
6399ac4
ggml : Callback before abort (#14481)
ScaledLizard Jul 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,23 @@ COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete

RUN apt-get update && \
apt-get install -y \
git \
python3 \
python3-pip \
python3-venv && \
python3 -m venv /opt/venv && \
. /opt/venv/bin/activate && \
pip install --upgrade pip setuptools wheel && \
pip install -r requirements.txt && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete

ENV PATH="/opt/venv/bin:$PATH"

ENTRYPOINT ["/app/tools.sh"]

Expand Down
2 changes: 1 addition & 1 deletion .devops/tools.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e

# Read the first argument into a variable
Expand Down
7 changes: 7 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,10 @@ nix:
embedding:
- changed-files:
- any-glob-to-any-file: examples/embedding/

Ascend NPU:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-cann.h
- ggml/src/ggml-cann/**
- docs/backend/CANN.md
51 changes: 51 additions & 0 deletions .github/workflows/build-cmake-pkg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Build relocatable cmake package
on:
workflow_dispatch:
workflow_call:

jobs:
linux:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Install dependencies
run: |
sudo apt update
sudo apt install -y build-essential tcl

- name: Build
run: |
PREFIX="$(pwd)"/inst
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
-DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
cmake --build build --config Release
cmake --install build --prefix "$PREFIX" --config Release

export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
tclsh <<'EOF'
set build(commit) [string trim [exec git rev-parse --short HEAD]]
set build(number) [string trim [exec git rev-list --count HEAD]]
set build(version) "0.0.$build(number)"

set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]]
set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \
"set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \
"set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"]

puts -nonewline "Checking llama-config.cmake version... "
foreach check $checks {
if {![regexp -expanded -- $check $llamaconfig]} {
puts "\"$check\" failed!"
exit 1
}
}
puts "success."
EOF

cd examples/simple-cmake-pkg
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
cmake --build build
73 changes: 60 additions & 13 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,43 @@ on:
push:
branches:
- master
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
paths: [
'.github/workflows/build.yml',
'.github/workflows/build-linux-cross.yml',
'.github/workflows/build-cmake-pkg.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp'
]

pull_request:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
paths: [
'.github/workflows/build.yml',
'.github/workflows/build-linux-cross.yml',
'.github/workflows/build-cmake-pkg.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp'
]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
Expand Down Expand Up @@ -51,7 +84,8 @@ jobs:
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_METAL_EMBED_LIBRARY=OFF \
-DGGML_METAL_SHADER_DEBUG=ON \
-DGGML_RPC=ON
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

Expand Down Expand Up @@ -306,6 +340,7 @@ jobs:
id: cmake_test
run: |
cd build
export GGML_VK_VISIBLE_DEVICES=0
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600

Expand Down Expand Up @@ -477,6 +512,9 @@ jobs:
build-linux-cross:
uses: ./.github/workflows/build-linux-cross.yml

build-cmake-pkg:
uses: ./.github/workflows/build-cmake-pkg.yml

macOS-latest-cmake-ios:
runs-on: macos-latest

Expand Down Expand Up @@ -627,7 +665,7 @@ jobs:
./build-xcframework.sh

windows-msys2:
runs-on: windows-latest
runs-on: windows-2025

strategy:
fail-fast: false
Expand Down Expand Up @@ -677,27 +715,33 @@ jobs:
cmake --build build --config ${{ matrix.build }} -j $(nproc)

windows-latest-cmake:
runs-on: windows-latest
runs-on: windows-2025

env:
OPENBLAS_VERSION: 0.3.23
SDE_VERSION: 9.33.0-2024-01-07
VULKAN_VERSION: 1.4.309.0
VULKAN_VERSION: 1.4.313.2

strategy:
matrix:
include:
- build: 'cpu-x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
- build: 'cpu-x64 (static)'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
- build: 'openblas-x64'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- build: 'vulkan-x64'
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
arch: 'x64'
defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
- build: 'llvm-arm64'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
- build: 'llvm-arm64-opencl-adreno'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
# - build: 'kompute-x64'
# arch: 'x64'
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'

steps:
Expand Down Expand Up @@ -735,7 +779,7 @@ jobs:
id: get_vulkan
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
Expand Down Expand Up @@ -768,6 +812,8 @@ jobs:
- name: libCURL
id: get_libcurl
uses: ./.github/actions/windows-setup-curl
with:
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}

- name: Build
id: cmake_build
Expand All @@ -777,6 +823,7 @@ jobs:
cmake -S . -B build ${{ matrix.defines }} `
-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release

- name: Add libopenblas.dll
id: add_libopenblas_dll
Expand All @@ -787,7 +834,7 @@ jobs:

- name: Test
id: cmake_test
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
if: ${{ matrix.arch == 'x64' }}
run: |
cd build
ctest -L main -C Release --verbose --timeout 900
Expand Down Expand Up @@ -892,7 +939,7 @@ jobs:
cmake --build build --config Release

windows-latest-cmake-sycl:
runs-on: windows-latest
runs-on: windows-2022

defaults:
run:
Expand Down Expand Up @@ -926,7 +973,7 @@ jobs:

windows-latest-cmake-hip:
if: ${{ github.event.inputs.create_release != 'true' }}
runs-on: windows-latest
runs-on: windows-2022

steps:
- name: Clone
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ jobs:
name: llama-bin-ubuntu-vulkan-x64.zip

windows-cpu:
runs-on: windows-latest
runs-on: windows-2025

strategy:
matrix:
Expand Down Expand Up @@ -271,7 +271,7 @@ jobs:
env:
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch }}
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
cmake -S . -B build -G "Ninja Multi-Config" ^
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
-DGGML_NATIVE=OFF ^
Expand All @@ -288,7 +288,7 @@ jobs:
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
run: |
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.42.34433\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*

- name: Upload artifacts
Expand All @@ -298,11 +298,11 @@ jobs:
name: llama-bin-win-cpu-${{ matrix.arch }}.zip

windows:
runs-on: windows-latest
runs-on: windows-2025

env:
OPENBLAS_VERSION: 0.3.23
VULKAN_VERSION: 1.4.309.0
VULKAN_VERSION: 1.4.313.2

strategy:
matrix:
Expand Down Expand Up @@ -332,7 +332,7 @@ jobs:
id: get_vulkan
if: ${{ matrix.backend == 'vulkan' }}
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
Expand Down Expand Up @@ -448,7 +448,7 @@ jobs:
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip

windows-sycl:
runs-on: windows-latest
runs-on: windows-2022

defaults:
run:
Expand Down Expand Up @@ -520,7 +520,7 @@ jobs:
name: llama-bin-win-sycl-x64.zip

windows-hip:
runs-on: windows-latest
runs-on: windows-2022

strategy:
matrix:
Expand Down
14 changes: 10 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)

if (NOT DEFINED LLAMA_BUILD_NUMBER)
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
endif()
if (NOT DEFINED LLAMA_BUILD_COMMIT)
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
endif()
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})

# override ggml options
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
Expand Down Expand Up @@ -155,6 +163,8 @@ if (LLAMA_USE_SYSTEM_GGML)
endif()

if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
add_subdirectory(ggml)
# ... otherwise assume ggml is added by a parent CMakeLists.txt
endif()
Expand Down Expand Up @@ -204,10 +214,6 @@ endif()
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})

set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
Expand Down
Loading