@@ -386,8 +386,19 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
386
386
for (int i = 0 ; i < n_bufs ; i ++ ) {
387
387
galloc -> bufts [i ] = bufts [i ];
388
388
galloc -> buffers [i ] = NULL ;
389
- size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
390
- galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
389
+
390
+ // check if the same buffer type is used multiple times and reuse the same allocator
391
+ for (int j = 0 ; j < i ; j ++ ) {
392
+ if (bufts [i ] == bufts [j ]) {
393
+ galloc -> buf_tallocs [i ] = galloc -> buf_tallocs [j ];
394
+ break ;
395
+ }
396
+ }
397
+
398
+ if (galloc -> buf_tallocs [i ] == NULL ) {
399
+ size_t alignment = ggml_backend_buft_get_alignment (bufts [i ]);
400
+ galloc -> buf_tallocs [i ] = ggml_dyn_tallocr_new (alignment );
401
+ }
391
402
}
392
403
galloc -> n_buffers = n_bufs ;
393
404
@@ -405,10 +416,30 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
405
416
406
417
for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
407
418
if (galloc -> buffers != NULL ) {
408
- ggml_backend_buffer_free (galloc -> buffers [i ]);
419
+ // skip if already freed
420
+ bool freed = false;
421
+ for (int j = 0 ; j < i ; j ++ ) {
422
+ if (galloc -> buffers [j ] == galloc -> buffers [i ]) {
423
+ freed = true;
424
+ break ;
425
+ }
426
+ }
427
+ if (!freed ) {
428
+ ggml_backend_buffer_free (galloc -> buffers [i ]);
429
+ }
409
430
}
410
431
if (galloc -> buf_tallocs != NULL ) {
411
- ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
432
+ // skip if already freed
433
+ bool freed = false;
434
+ for (int j = 0 ; j < i ; j ++ ) {
435
+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
436
+ freed = true;
437
+ break ;
438
+ }
439
+ }
440
+ if (!freed ) {
441
+ ggml_dyn_tallocr_free (galloc -> buf_tallocs [i ]);
442
+ }
412
443
}
413
444
}
414
445
@@ -723,6 +754,14 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
723
754
724
755
// reallocate buffers if needed
725
756
for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
757
+ // if the buffer type is used multiple times, we reuse the same buffer
758
+ for (int j = 0 ; j < i ; j ++ ) {
759
+ if (galloc -> buf_tallocs [j ] == galloc -> buf_tallocs [i ]) {
760
+ galloc -> buffers [i ] = galloc -> buffers [j ];
761
+ break ;
762
+ }
763
+ }
764
+
726
765
size_t cur_size = galloc -> buffers [i ] ? ggml_backend_buffer_get_size (galloc -> buffers [i ]) : 0 ;
727
766
size_t new_size = ggml_dyn_tallocr_max_size (galloc -> buf_tallocs [i ]);
728
767
@@ -731,6 +770,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
731
770
#ifndef NDEBUG
732
771
fprintf (stderr , "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
733
772
#endif
773
+
734
774
ggml_backend_buffer_free (galloc -> buffers [i ]);
735
775
galloc -> buffers [i ] = ggml_backend_buft_alloc_buffer (galloc -> bufts [i ], new_size );
736
776
if (galloc -> buffers [i ] == NULL ) {
@@ -879,6 +919,15 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
879
919
if (galloc -> buffers [buffer_id ] == NULL ) {
880
920
return 0 ;
881
921
}
922
+
923
+ for (int i = 0 ; i < buffer_id ; i ++ ) {
924
+ if (galloc -> buffers [i ] == galloc -> buffers [buffer_id ]) {
925
+ // this buffer is the same as a previous one due to the same buffer type being used multiple times
926
+ // only return the buffer size the first time it appears to avoid double counting
927
+ return 0 ;
928
+ }
929
+ }
930
+
882
931
return ggml_backend_buffer_get_size (galloc -> buffers [buffer_id ]);
883
932
}
884
933
0 commit comments