44
55#ifdef GGML_USE_CUDA
66#include " ggml-cuda.h"
7+ // #include <cuda_runtime.h>
78#endif
89
910#ifdef GGML_USE_METAL
@@ -36,8 +37,10 @@ struct test_model {
3637
3738void load_model (test_model & model, bool use_gpu = false ) {
3839 // create data
39- int KW = 3 , KH = 3 , IC = 32 , OC = 64 ;
40- int IW = 28 , IH = 40 , N = 1 ;
40+ int KW = 3 , KH = 3 , IC = 256 , OC = 256 ;
41+ int IW = 832 , IH = 1216 , N = 1 ;
42+
43+ printf (" input: IC = %d, OC = %d, IW = %d, IH = %d \n " , IC, OC, IW, IH);
4144
4245 // Initialize adata
4346 std::vector<float > adata (KW * KH * IC * OC);
@@ -135,7 +138,7 @@ void load_model(test_model & model, bool use_gpu = false) {
135138 }
136139}
137140
138- struct ggml_cgraph * build_graph (const test_model& model) {
141+ struct ggml_cgraph * build_graph_0 (const test_model& model) {
139142 static size_t buf_size = ggml_tensor_overhead ()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead ();
140143 static std::vector<uint8_t > buf (buf_size);
141144
@@ -163,21 +166,104 @@ struct ggml_cgraph * build_graph(const test_model& model) {
163166 struct ggml_tensor * conv2d_res = ggml_conv_2d (ctx0, model.a , model.b , s0, s1, p0, p1, d0, d1);
164167 ggml_set_name (conv2d_res, " conv2d_res" );
165168 ggml_build_forward_expand (gf, conv2d_res);
166- int64_t *ne = conv2d_res->ne ;
167- printf (" conv2d: (%zu, %zu, %zu, %zu) \n " , ne[0 ], ne[1 ], ne[2 ], ne[3 ]);
169+ // int64_t *ne = conv2d_res->ne;
170+ // printf("conv2d: (%zu, %zu, %zu, %zu) \n", ne[0], ne[1], ne[2], ne[3]);
171+
172+
173+ // struct ggml_tensor* wino_res = ggml_conv_2d_3x3(ctx0, model.a, model.b);
174+ // ggml_set_name(wino_res, "wino_res");
175+ // ggml_build_forward_expand(gf, wino_res);
176+ // ne = wino_res->ne;
177+ // printf("wino: (%zu, %zu, %zu, %zu) \n", ne[0], ne[1], ne[2], ne[3]);
178+ ggml_free (ctx0);
179+ return gf;
180+ }
181+
182+ struct ggml_cgraph * build_graph_1 (const test_model& model) {
183+ static size_t buf_size = ggml_tensor_overhead ()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead ();
184+ static std::vector<uint8_t > buf (buf_size);
185+
186+ struct ggml_init_params params0 = {
187+ /* .mem_size =*/ buf_size,
188+ /* .mem_buffer =*/ buf.data (),
189+ /* .no_alloc =*/ true , // the tensors will be allocated later by ggml_gallocr_alloc_graph()
190+ };
191+
192+ // create a temporally context to build the graph
193+ struct ggml_context * ctx0 = ggml_init (params0);
194+
195+ struct ggml_cgraph * gf = ggml_new_graph (ctx0);
196+
197+ int s0 = 1 ;
198+ int s1 = 1 ;
199+ int p0 = 1 ;
200+ int p1 = 1 ;
201+ int d0 = 1 ;
202+ int d1 = 1 ;
203+
204+
205+
206+ // recalculate for avoid fragmentation
207+ // struct ggml_tensor* conv2d_res = ggml_conv_2d(ctx0, model.a, model.b, s0, s1, p0, p1, d0, d1);
208+ // ggml_set_name(conv2d_res, "conv2d_res");
209+ // ggml_build_forward_expand(gf, conv2d_res);
210+ // int64_t *ne = conv2d_res->ne;
211+ // printf("conv2d: (%zu, %zu, %zu, %zu) \n", ne[0], ne[1], ne[2], ne[3]);
168212
169213
170214 struct ggml_tensor * wino_res = ggml_conv_2d_3x3 (ctx0, model.a , model.b );
171215 ggml_set_name (wino_res, " wino_res" );
172216 ggml_build_forward_expand (gf, wino_res);
173- ne = wino_res->ne ;
174- printf (" wino: (%zu, %zu, %zu, %zu) \n " , ne[0 ], ne[1 ], ne[2 ], ne[3 ]);
217+ // ne = wino_res->ne;
218+ // printf("wino: (%zu, %zu, %zu, %zu) \n", ne[0], ne[1], ne[2], ne[3]);
175219 ggml_free (ctx0);
176220 return gf;
177221}
178222
179- struct ggml_cgraph * compute_graph (const test_model & model, ggml_gallocr_t allocr) {
180- struct ggml_cgraph * gf = build_graph (model);
223+ struct ggml_cgraph * compute_graph_0 (const test_model & model, ggml_gallocr_t allocr) {
224+ struct ggml_cgraph * gf = build_graph_0 (model);
225+
226+ // allocate tensors
227+ ggml_gallocr_alloc_graph (allocr, gf);
228+ int n_threads = 1 ;
229+
230+ if (ggml_backend_is_cpu (model.backend )) {
231+ ggml_backend_cpu_set_n_threads (model.backend , n_threads);
232+ }
233+
234+ #ifdef GGML_USE_METAL
235+ if (ggml_backend_is_metal (model.backend )) {
236+ ggml_backend_metal_set_n_cb (model.backend , n_threads);
237+ }
238+ #endif
239+
240+ int iterations = 20 ;
241+
242+ ggml_backend_graph_compute (model.backend , gf);
243+
244+ ggml_backend_synchronize (model.backend );
245+
246+ int64_t start_time = ggml_time_us ();
247+
248+ for (int iter=0 ; iter<iterations; iter++){
249+ ggml_backend_graph_compute (model.backend , gf);
250+ }
251+
252+ ggml_backend_synchronize (model.backend );
253+ int64_t end_time = ggml_time_us ();
254+ double time_us = end_time - start_time;
255+
256+ time_us = time_us/iterations;
257+ printf (" Taking %f ms\n " , time_us/1000 );
258+
259+ // ggml_graph_print(gf);
260+
261+ return gf;
262+ }
263+
264+
265+ struct ggml_cgraph * compute_graph_1 (const test_model & model, ggml_gallocr_t allocr) {
266+ struct ggml_cgraph * gf = build_graph_1 (model);
181267
182268 // allocate tensors
183269 ggml_gallocr_alloc_graph (allocr, gf);
@@ -193,8 +279,25 @@ struct ggml_cgraph * compute_graph(const test_model & model, ggml_gallocr_t allo
193279 }
194280#endif
195281
282+ int iterations = 20 ;
283+
196284 ggml_backend_graph_compute (model.backend , gf);
197285
286+ ggml_backend_synchronize (model.backend );
287+
288+ int64_t start_time = ggml_time_us ();
289+
290+ for (int iter=0 ; iter<iterations; iter++){
291+ ggml_backend_graph_compute (model.backend , gf);
292+ }
293+
294+ ggml_backend_synchronize (model.backend );
295+ int64_t end_time = ggml_time_us ();
296+ double time_us = end_time - start_time;
297+
298+ time_us = time_us/iterations;
299+ printf (" Taking %f ms\n " , time_us/1000 );
300+
198301 // ggml_graph_print(gf);
199302
200303 return gf;
@@ -213,24 +316,58 @@ int main(void)
213316 allocr = ggml_gallocr_new (ggml_backend_get_default_buffer_type (model.backend ));
214317
215318 // create the worst case graph for memory usage estimation
216- struct ggml_cgraph * gf = build_graph (model);
319+ struct ggml_cgraph * gf = build_graph_0 (model);
320+
321+ // compute the required memory
322+ ggml_gallocr_reserve (allocr, gf);
323+ size_t mem_size = ggml_gallocr_get_buffer_size (allocr, 0 );
324+ fprintf (stderr, " %s: compute buffer size: %.2f MB\n " , __func__, mem_size/1024 .0f /1024 .0f );
325+ }
326+
327+ struct ggml_cgraph * gf_res_0 = NULL ;
328+
329+ gf_res_0 = compute_graph_0 (model, allocr);
330+
331+
332+ // ggml_gallocr_t allocr = NULL;
333+
334+ {
335+ allocr = ggml_gallocr_new (ggml_backend_get_default_buffer_type (model.backend ));
336+
337+ // create the worst case graph for memory usage estimation
338+ struct ggml_cgraph * gf = build_graph_1 (model);
217339
218340 // compute the required memory
219341 ggml_gallocr_reserve (allocr, gf);
220342 size_t mem_size = ggml_gallocr_get_buffer_size (allocr, 0 );
221343 fprintf (stderr, " %s: compute buffer size: %.2f MB\n " , __func__, mem_size/1024 .0f /1024 .0f );
222344 }
223345
224- struct ggml_cgraph * gf_res = compute_graph (model, allocr);
346+ struct ggml_cgraph * gf_res_1 = NULL ;
347+
348+ gf_res_1 = compute_graph_1 (model, allocr);
349+
350+
351+
352+
353+
225354
226355 struct ggml_tensor * wino_res = NULL ;
227356 struct ggml_tensor * conv2d_res = NULL ;
228357
229- for (int i = 0 ; i < ggml_graph_n_nodes (gf_res); ++i) {
230- if (strcmp (ggml_get_name (ggml_graph_node (gf_res, i)), " wino_res" ) == 0 ) {
231- wino_res = ggml_graph_node (gf_res, i);
232- } else if (strcmp (ggml_get_name (ggml_graph_node (gf_res, i)), " conv2d_res" ) == 0 ) {
233- conv2d_res = ggml_graph_node (gf_res, i);
358+ for (int i = 0 ; i < ggml_graph_n_nodes (gf_res_0); ++i) {
359+ if (strcmp (ggml_get_name (ggml_graph_node (gf_res_0, i)), " wino_res" ) == 0 ) {
360+ wino_res = ggml_graph_node (gf_res_0, i);
361+ } else if (strcmp (ggml_get_name (ggml_graph_node (gf_res_0, i)), " conv2d_res" ) == 0 ) {
362+ conv2d_res = ggml_graph_node (gf_res_0, i);
363+ }
364+ }
365+
366+ for (int i = 0 ; i < ggml_graph_n_nodes (gf_res_1); ++i) {
367+ if (strcmp (ggml_get_name (ggml_graph_node (gf_res_1, i)), " wino_res" ) == 0 ) {
368+ wino_res = ggml_graph_node (gf_res_1, i);
369+ } else if (strcmp (ggml_get_name (ggml_graph_node (gf_res_1, i)), " conv2d_res" ) == 0 ) {
370+ conv2d_res = ggml_graph_node (gf_res_1, i);
234371 }
235372 }
236373
@@ -245,15 +382,15 @@ int main(void)
245382
246383 bool passed = true ;
247384 // for(int i = 0; i < ggml_nelements(wino_res); i++) {
248- for (int i = 0 ; i < 3 *28 ; i++) {
249- float diff = fabs (conv2d_data[i] - wino_data[i]);
250- // if(diff > 1.e-4) {
251- printf (" (%f, %f, %f, %d) \n " ,
252- conv2d_data[i],
253- wino_data[i], diff, i);
254- // break;
255- // }
256- }
385+ // for(int i = 0; i < 3*28; i++) {
386+ // float diff = fabs(conv2d_data[i] - wino_data[i]);
387+ // // if(diff > 1.e-4) {
388+ // printf("(%f, %f, %f, %d) \n",
389+ // conv2d_data[i],
390+ // wino_data[i], diff, i);
391+ // // break;
392+ // // }
393+ // }
257394
258395
259396
0 commit comments