@@ -386,8 +386,19 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
386386 for (int i = 0 ; i < n_bufs ; i ++ ) {
387387 galloc -> bufts [i ] = bufts [i ];
388388 galloc -> buffers [i ] = NULL ;
389- size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
390- galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
389+
390+ // check if the same buffer type is used multiple times and reuse the same allocator
391+ for (int j = 0 ; j < i ; j ++ ) {
392+ if (bufts [i ] == bufts [j ]) {
393+ galloc -> buf_tallocs [i ] = galloc -> buf_tallocs [j ];
394+ break ;
395+ }
396+ }
397+
398+ if (galloc -> buf_tallocs [i ] == NULL ) {
399+ size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
400+ galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
401+ }
391402 }
392403 galloc -> n_buffers = n_bufs ;
393404
@@ -405,10 +416,30 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
405416
406417 for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
407418 if (galloc -> buffers != NULL ) {
408- ggml_backend_buffer_free (galloc -> buffers [i ]);
419+ // skip if already freed
420+ bool freed = false;
421+ for (int j = 0 ; j < i ; j ++ ) {
422+ if (galloc -> buffers [j ] == galloc -> buffers [i ]) {
423+ freed = true;
424+ break ;
425+ }
426+ }
427+ if (!freed ) {
428+ ggml_backend_buffer_free (galloc -> buffers [i ]);
429+ }
409430 }
410431 if (galloc -> buf_tallocs != NULL ) {
411- ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
432+ // skip if already freed
433+ bool freed = false;
434+ for (int j = 0 ; j < i ; j ++ ) {
435+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
436+ freed = true;
437+ break ;
438+ }
439+ }
440+ if (!freed ) {
441+ ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
442+ }
412443 }
413444 }
414445
@@ -723,6 +754,14 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
723754
724755 // reallocate buffers if needed
725756 for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
757+ // if the buffer type is used multiple times, we reuse the same buffer
758+ for (int j = 0 ; j < i ; j ++ ) {
759+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
760+ galloc -> buffers [i ] = galloc -> buffers [j ];
761+ break ;
762+ }
763+ }
764+
726765 size_t cur_size = galloc -> buffers [i ] ? ggml_backend_buffer_get_size (galloc -> buffers [i ]) : 0 ;
727766 size_t new_size = ggml_dyn_tallocr_max_size (galloc -> buf_tallocs [i ]);
728767
@@ -731,6 +770,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
731770#ifndef NDEBUG
732771 fprintf (stderr , "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
733772#endif
773+
734774 ggml_backend_buffer_free (galloc -> buffers [i ]);
735775 galloc -> buffers [i ] = ggml_backend_buft_alloc_buffer (galloc -> bufts [i ], new_size );
736776 if (galloc -> buffers [i ] == NULL ) {
@@ -879,6 +919,15 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
879919 if (galloc -> buffers [buffer_id ] == NULL ) {
880920 return 0 ;
881921 }
922+
923+ for (int i = 0 ; i < buffer_id ; i ++ ) {
924+ if (galloc -> buffers [i ] == galloc -> buffers [buffer_id ]) {
925+ // this buffer is the same as a previous one due to the same buffer type being used multiple times
926+ // only return the buffer size the first time it appears to avoid double counting
927+ return 0 ;
928+ }
929+ }
930+
882931 return ggml_backend_buffer_get_size (galloc -> buffers [buffer_id ]);
883932}
884933
0 commit comments