Skip to content

Commit 845fa20

Browse files
committed
alloc : reuse same buffer when the same buffer type if used multiple times
1 parent 77f88e3 commit 845fa20

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

ggml-alloc.c

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,19 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
386386
for (int i = 0; i < n_bufs; i++) {
387387
galloc->bufts[i] = bufts[i];
388388
galloc->buffers[i] = NULL;
389-
size_t alignment = ggml_backend_buft_get_alignment(bufts[i]);
390-
galloc->buf_tallocs[i] = ggml_dyn_tallocr_new(alignment);
389+
390+
// check if the same buffer type is used multiple times and reuse the same allocator
391+
for (int j = 0; j < i; j++) {
392+
if (bufts[i] == bufts[j]) {
393+
galloc->buf_tallocs[i] = galloc->buf_tallocs[j];
394+
break;
395+
}
396+
}
397+
398+
if (galloc->buf_tallocs[i] == NULL) {
399+
size_t alignment = ggml_backend_buft_get_alignment(bufts[i]);
400+
galloc->buf_tallocs[i] = ggml_dyn_tallocr_new(alignment);
401+
}
391402
}
392403
galloc->n_buffers = n_bufs;
393404

@@ -405,10 +416,30 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
405416

406417
for (int i = 0; i < galloc->n_buffers; i++) {
407418
if (galloc->buffers != NULL) {
408-
ggml_backend_buffer_free(galloc->buffers[i]);
419+
// skip if already freed
420+
bool freed = false;
421+
for (int j = 0; j < i; j++) {
422+
if (galloc->buffers[j] == galloc->buffers[i]) {
423+
freed = true;
424+
break;
425+
}
426+
}
427+
if (!freed) {
428+
ggml_backend_buffer_free(galloc->buffers[i]);
429+
}
409430
}
410431
if (galloc->buf_tallocs != NULL) {
411-
ggml_dyn_tallocr_free(galloc->buf_tallocs[i]);
432+
// skip if already freed
433+
bool freed = false;
434+
for (int j = 0; j < i; j++) {
435+
if (galloc->buf_tallocs[j] == galloc->buf_tallocs[i]) {
436+
freed = true;
437+
break;
438+
}
439+
}
440+
if (!freed) {
441+
ggml_dyn_tallocr_free(galloc->buf_tallocs[i]);
442+
}
412443
}
413444
}
414445

@@ -723,6 +754,14 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
723754

724755
// reallocate buffers if needed
725756
for (int i = 0; i < galloc->n_buffers; i++) {
757+
// if the buffer type is used multiple times, we reuse the same buffer
758+
for (int j = 0; j < i; j++) {
759+
if (galloc->buf_tallocs[j] == galloc->buf_tallocs[i]) {
760+
galloc->buffers[i] = galloc->buffers[j];
761+
break;
762+
}
763+
}
764+
726765
size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0;
727766
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
728767

@@ -731,6 +770,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
731770
#ifndef NDEBUG
732771
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
733772
#endif
773+
734774
ggml_backend_buffer_free(galloc->buffers[i]);
735775
galloc->buffers[i] = ggml_backend_buft_alloc_buffer(galloc->bufts[i], new_size);
736776
if (galloc->buffers[i] == NULL) {
@@ -879,6 +919,15 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
879919
if (galloc->buffers[buffer_id] == NULL) {
880920
return 0;
881921
}
922+
923+
for (int i = 0; i < buffer_id; i++) {
924+
if (galloc->buffers[i] == galloc->buffers[buffer_id]) {
925+
// this buffer is the same as a previous one due to the same buffer type being used multiple times
926+
// only return the buffer size the first time it appears to avoid double counting
927+
return 0;
928+
}
929+
}
930+
882931
return ggml_backend_buffer_get_size(galloc->buffers[buffer_id]);
883932
}
884933

ggml-blas.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,6 @@ static void ggml_backend_blas_out_prod(struct ggml_backend_blas_context * ctx, s
144144
// GGML_ASSERT(nb1 <= nb2);
145145
// GGML_ASSERT(nb2 <= nb3);
146146

147-
// nb01 >= nb00 - src0 is not transposed
148-
// compute by src0 rows
149-
150147
// Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
151148
// src0: (k,n)
152149
// src1: (k,m)

0 commit comments

Comments
 (0)