Skip to content

Commit d78508d

Browse files
committed
patch: metal : avoid using Metal's gpuAddress property
See: ggml-org/llama.cpp#16576 Signed-off-by: Julien Jerphanion <[email protected]>
1 parent 3b018bd commit d78508d

File tree

3 files changed

+182
-27
lines changed

3 files changed

+182
-27
lines changed

recipe/0001-Change-gpuAddress-for-contents.patch

Lines changed: 0 additions & 26 deletions
This file was deleted.

recipe/16576.patch

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
From a8d57d66096a65019e2354ff9efe23688794f72e Mon Sep 17 00:00:00 2001
2+
From: Georgi Gerganov <[email protected]>
3+
Date: Tue, 14 Oct 2025 14:11:18 +0300
4+
Subject: [PATCH 1/2] metal : avoid using Metal's gpuAddress property
5+
6+
---
7+
ggml/src/ggml-metal/ggml-metal-device.m | 24 ++++++++++++++----------
8+
1 file changed, 14 insertions(+), 10 deletions(-)
9+
10+
diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m
11+
index c3fe8f4e91002..553cf8f5f39ac 100644
12+
--- a/ggml/src/ggml-metal/ggml-metal-device.m
13+
+++ b/ggml/src/ggml-metal/ggml-metal-device.m
14+
@@ -7,6 +7,8 @@
15+
16+
#include <Metal/Metal.h>
17+
18+
+#include <stdatomic.h>
19+
+
20+
#ifndef TARGET_OS_VISION
21+
#define TARGET_OS_VISION 0
22+
#endif
23+
@@ -22,6 +24,9 @@
24+
// overload of MTLGPUFamilyMetal3 (not available in some environments)
25+
static const NSInteger MTLGPUFamilyMetal3_GGML = 5001;
26+
27+
+// virtual address for GPU memory allocations
28+
+static atomic_uintptr_t g_addr_device = 0x000000400ULL;
29+
+
30+
#if !GGML_METAL_EMBED_LIBRARY
31+
// Here to assist with NSBundle Path Hack
32+
@interface GGMLMetalClass : NSObject
33+
@@ -827,7 +832,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
34+
};
35+
36+
struct ggml_metal_buffer {
37+
- void * all_data; // TODO: https://github.com/ggml-org/llama.cpp/pull/15985
38+
+ void * all_data;
39+
size_t all_size;
40+
41+
// if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host
42+
@@ -965,14 +970,15 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
43+
if (shared) {
44+
res->all_data = ggml_metal_host_malloc(size_aligned);
45+
res->is_shared = true;
46+
- res->owned = true;
47+
} else {
48+
- // dummy, non-NULL value - we'll populate this after creating the Metal buffer below
49+
- res->all_data = (void *) 0x000000400ULL;
50+
+ // use virtual address from g_addr_device counter
51+
+ res->all_data = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed);
52+
res->is_shared = false;
53+
}
54+
res->all_size = size_aligned;
55+
56+
+ res->owned = true;
57+
+
58+
res->device = ggml_metal_device_get_obj(dev);
59+
res->queue = ggml_metal_device_get_queue(dev);
60+
61+
@@ -983,15 +989,13 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
62+
res->buffers[0].metal = nil;
63+
64+
if (size_aligned > 0) {
65+
- if (props_dev->use_shared_buffers &&shared) {
66+
+ if (props_dev->use_shared_buffers && shared) {
67+
res->buffers[0].metal = [res->device newBufferWithBytesNoCopy:res->all_data
68+
length:size_aligned
69+
options:MTLResourceStorageModeShared
70+
deallocator:nil];
71+
} else {
72+
res->buffers[0].metal = [res->device newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate];
73+
-
74+
- res->all_data = (void *) (res->buffers[0].metal.gpuAddress);
75+
}
76+
}
77+
78+
@@ -1139,7 +1143,7 @@ bool ggml_metal_buffer_is_shared(ggml_metal_buffer_t buf) {
79+
80+
void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
81+
if (buf->is_shared) {
82+
- memset((char *)tensor->data + offset, value, size);
83+
+ memset((char *) tensor->data + offset, value, size);
84+
return;
85+
}
86+
87+
@@ -1168,7 +1172,7 @@ void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor
88+
89+
void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
90+
if (buf->is_shared) {
91+
- memcpy((char *)tensor->data + offset, data, size);
92+
+ memcpy((char *) tensor->data + offset, data, size);
93+
return;
94+
}
95+
96+
@@ -1223,7 +1227,7 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
97+
98+
void ggml_metal_buffer_get_tensor(ggml_metal_buffer_t buf, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
99+
if (buf->is_shared) {
100+
- memcpy(data, (const char *)tensor->data + offset, size);
101+
+ memcpy(data, (const char *) tensor->data + offset, size);
102+
return;
103+
}
104+
105+
106+
From 84e3d8d26961cca81de65b1790506121dda45bf5 Mon Sep 17 00:00:00 2001
107+
From: Georgi Gerganov <[email protected]>
108+
Date: Tue, 14 Oct 2025 14:44:01 +0300
109+
Subject: [PATCH 2/2] metal : fix rope kernels buffer check
110+
111+
---
112+
ggml/src/ggml-metal/ggml-metal-impl.h | 1 +
113+
ggml/src/ggml-metal/ggml-metal-ops.cpp | 1 +
114+
ggml/src/ggml-metal/ggml-metal.metal | 8 ++++----
115+
3 files changed, 6 insertions(+), 4 deletions(-)
116+
117+
diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h
118+
index a448c14f66b63..fa2d82cefb40e 100644
119+
--- a/ggml/src/ggml-metal/ggml-metal-impl.h
120+
+++ b/ggml/src/ggml-metal/ggml-metal-impl.h
121+
@@ -251,6 +251,7 @@ typedef struct {
122+
int32_t sect_1;
123+
int32_t sect_2;
124+
int32_t sect_3;
125+
+ bool src2;
126+
} ggml_metal_kargs_rope;
127+
128+
typedef struct {
129+
diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp
130+
index a61ea8fb5a7b3..784b7b77851e6 100644
131+
--- a/ggml/src/ggml-metal/ggml-metal-ops.cpp
132+
+++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp
133+
@@ -2969,6 +2969,7 @@ int ggml_metal_op_rope(ggml_metal_op_t ctx, int idx) {
134+
/* sect_1 =*/ sect_1,
135+
/* sect_2 =*/ sect_2,
136+
/* sect_3 =*/ sect_3,
137+
+ /* src2 =*/ op->src[2] != nullptr,
138+
};
139+
140+
ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_rope(lib, op);
141+
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
142+
index 1029cf8f9a3ab..6d39ddcc634ef 100644
143+
--- a/ggml/src/ggml-metal/ggml-metal.metal
144+
+++ b/ggml/src/ggml-metal/ggml-metal.metal
145+
@@ -3748,7 +3748,7 @@ kernel void kernel_rope_norm(
146+
147+
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
148+
149+
- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
150+
+ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
151+
152+
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
153+
154+
@@ -3801,7 +3801,7 @@ kernel void kernel_rope_neox(
155+
156+
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
157+
158+
- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
159+
+ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
160+
161+
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
162+
163+
@@ -3872,7 +3872,7 @@ kernel void kernel_rope_multi(
164+
165+
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
166+
167+
- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
168+
+ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
169+
170+
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
171+
172+
@@ -3939,7 +3939,7 @@ kernel void kernel_rope_vision(
173+
const float theta = theta_base * pow(args.freq_base, 2.0f * inv_ndims * p);
174+
// end of mrope
175+
176+
- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
177+
+ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
178+
179+
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
180+

recipe/recipe.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ source:
1111
url: https://github.com/ggml-org/${{ name }}/archive/b${{ version | split(".") | list | last }}.tar.gz
1212
sha256: bfe625422c8fa74cf12d1d6aff8bdbbe61c86647de1615c2e7b6f0cde4804e18
1313
patches:
14-
- 0001-Change-gpuAddress-for-contents.patch
14+
# See: https://github.com/ggml-org/llama.cpp/pull/16576/
15+
- 16576.patch
1516

1617
build:
1718
number: ${{ build }}

0 commit comments

Comments
 (0)