@@ -339,6 +339,7 @@ struct hash_node {
339
339
};
340
340
341
341
struct tensor_alloc {
342
+ int buffer_id ;
342
343
size_t offset ;
343
344
size_t size_max ; // 0 = pre-allocated, unused, or view
344
345
};
@@ -349,7 +350,6 @@ struct leaf_alloc {
349
350
};
350
351
351
352
struct node_alloc {
352
- int buffer_id ;
353
353
struct tensor_alloc dst ;
354
354
struct tensor_alloc src [GGML_MAX_SRC ];
355
355
};
@@ -511,17 +511,18 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
511
511
}
512
512
}
513
513
514
- static void ggml_gallocr_free_node (ggml_gallocr_t galloc , struct ggml_tensor * node , int buffer_id ) {
514
+ static void ggml_gallocr_free_node (ggml_gallocr_t galloc , struct ggml_tensor * node ) {
515
515
// graph outputs are never freed
516
516
if (node -> flags & GGML_TENSOR_FLAG_OUTPUT ) {
517
517
AT_PRINTF ("not freeing output %s\n" , node -> name );
518
518
return ;
519
519
}
520
520
521
- struct ggml_dyn_tallocr * alloc = galloc -> buf_tallocs [buffer_id ];
522
- ggml_backend_buffer_type_t buft = galloc -> bufts [buffer_id ];
523
521
struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
524
522
size_t offset = hn -> offset ;
523
+ int buffer_id = hn -> buffer_id ;
524
+ struct ggml_dyn_tallocr * alloc = galloc -> buf_tallocs [buffer_id ];
525
+ ggml_backend_buffer_type_t buft = galloc -> bufts [buffer_id ];
525
526
size_t size = ggml_backend_buft_get_alloc_size (buft , node );
526
527
ggml_dyn_tallocr_free_tensor (alloc , offset , size , node );
527
528
hn -> allocated = false;
@@ -626,11 +627,11 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
626
627
AT_PRINTF ("view_src %s: %d children, %d views\n" ,
627
628
view_src -> name , view_src_hn -> n_children , view_src_hn -> n_views );
628
629
if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src_hn -> allocated ) {
629
- ggml_gallocr_free_node (galloc , view_src , buffer_id );
630
+ ggml_gallocr_free_node (galloc , view_src );
630
631
}
631
632
}
632
633
else if (p_hn -> allocated ) {
633
- ggml_gallocr_free_node (galloc , parent , buffer_id );
634
+ ggml_gallocr_free_node (galloc , parent );
634
635
}
635
636
}
636
637
AT_PRINTF ("\n" );
@@ -674,22 +675,26 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
674
675
for (int i = 0 ; i < graph -> n_nodes ; i ++ ) {
675
676
struct ggml_tensor * node = graph -> nodes [i ];
676
677
struct node_alloc * node_alloc = & galloc -> node_allocs [i ];
677
- node_alloc -> buffer_id = get_node_buffer_id (node_buffer_ids , i );
678
+ // node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i);
678
679
if (node -> view_src || node -> data ) {
680
+ node_alloc -> dst .buffer_id = -1 ;
679
681
node_alloc -> dst .offset = SIZE_MAX ;
680
682
node_alloc -> dst .size_max = 0 ;
681
683
} else {
682
684
struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
683
- node_alloc -> dst .offset = hn -> offset ;
684
- node_alloc -> dst .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
685
+ node_alloc -> dst .buffer_id = hn -> buffer_id ;
686
+ node_alloc -> dst .offset = hn -> offset ;
687
+ node_alloc -> dst .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
685
688
}
686
689
for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
687
690
struct ggml_tensor * src = node -> src [j ];
688
691
if (!src || src -> view_src || src -> data ) {
692
+ node_alloc -> src [j ].buffer_id = -1 ;
689
693
node_alloc -> src [j ].offset = SIZE_MAX ;
690
694
node_alloc -> src [j ].size_max = 0 ;
691
695
} else {
692
696
struct hash_node * hn = ggml_gallocr_hash_get (galloc , src );
697
+ node_alloc -> src [j ].buffer_id = hn -> buffer_id ;
693
698
node_alloc -> src [j ].offset = hn -> offset ;
694
699
node_alloc -> src [j ].size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], src );
695
700
}
@@ -706,9 +711,11 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
706
711
struct hash_node * hn = ggml_gallocr_hash_get (galloc , leaf );
707
712
galloc -> leaf_allocs [i ].buffer_id = hn -> buffer_id ;
708
713
if (leaf -> view_src || leaf -> data ) {
714
+ galloc -> leaf_allocs [i ].leaf .buffer_id = -1 ;
709
715
galloc -> leaf_allocs [i ].leaf .offset = SIZE_MAX ;
710
716
galloc -> leaf_allocs [i ].leaf .size_max = 0 ;
711
717
} else {
718
+ galloc -> leaf_allocs [i ].leaf .buffer_id = hn -> buffer_id ;
712
719
galloc -> leaf_allocs [i ].leaf .offset = hn -> offset ;
713
720
galloc -> leaf_allocs [i ].leaf .size_max = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], leaf );
714
721
}
@@ -740,7 +747,8 @@ bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
740
747
return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
741
748
}
742
749
743
- static void ggml_gallocr_init_tensor (ggml_gallocr_t galloc , struct ggml_tensor * tensor , int buffer_id , struct tensor_alloc * tensor_alloc ) {
750
+ static void ggml_gallocr_init_tensor (ggml_gallocr_t galloc , struct ggml_tensor * tensor , struct tensor_alloc * tensor_alloc ) {
751
+ int buffer_id = tensor_alloc -> buffer_id ;
744
752
assert (tensor -> data || tensor -> view_src || ggml_backend_buffer_get_alloc_size (galloc -> buffers [buffer_id ], tensor ) <= tensor_alloc -> size_max );
745
753
746
754
if (tensor -> view_src != NULL ) {
@@ -768,8 +776,8 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
768
776
}
769
777
}
770
778
771
- static bool ggml_gallocr_node_needs_realloc (ggml_gallocr_t galloc , struct ggml_tensor * node , struct node_alloc * nalloc , struct tensor_alloc * talloc ) {
772
- ggml_backend_buffer_type_t buft = galloc -> bufts [nalloc -> buffer_id ];
779
+ static bool ggml_gallocr_node_needs_realloc (ggml_gallocr_t galloc , struct ggml_tensor * node , struct tensor_alloc * talloc ) {
780
+ ggml_backend_buffer_type_t buft = talloc -> buffer_id != -1 ? galloc -> bufts [talloc -> buffer_id ] : NULL ;
773
781
size_t node_size = (node -> data || node -> view_src ) ? 0 : ggml_backend_buft_get_alloc_size (buft , node );
774
782
return talloc -> size_max >= node_size ;
775
783
}
@@ -793,7 +801,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
793
801
struct ggml_tensor * node = graph -> nodes [i ];
794
802
struct node_alloc * node_alloc = & galloc -> node_allocs [i ];
795
803
796
- if (!ggml_gallocr_node_needs_realloc (galloc , node , node_alloc , & node_alloc -> dst )) {
804
+ if (!ggml_gallocr_node_needs_realloc (galloc , node , & node_alloc -> dst )) {
797
805
#ifndef NDEBUG
798
806
fprintf (stderr , "%s: node %s is not valid\n" , __func__ , node -> name );
799
807
#endif
@@ -805,7 +813,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
805
813
if (src == NULL ) {
806
814
continue ;
807
815
}
808
- if (!ggml_gallocr_node_needs_realloc (galloc , src , node_alloc , & node_alloc -> src [j ])) {
816
+ if (!ggml_gallocr_node_needs_realloc (galloc , src , & node_alloc -> src [j ])) {
809
817
#ifndef NDEBUG
810
818
fprintf (stderr , "%s: src %d (%s) of node %s is not valid\n" , __func__ , j , src -> name , node -> name );
811
819
#endif
@@ -846,7 +854,7 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
846
854
for (int i = 0 ; i < graph -> n_leafs ; i ++ ) {
847
855
struct ggml_tensor * leaf = graph -> leafs [i ];
848
856
struct leaf_alloc * leaf_alloc = & galloc -> leaf_allocs [i ];
849
- ggml_gallocr_init_tensor (galloc , leaf , leaf_alloc -> buffer_id , & leaf_alloc -> leaf );
857
+ ggml_gallocr_init_tensor (galloc , leaf , & leaf_alloc -> leaf );
850
858
}
851
859
// nodes
852
860
for (int i = 0 ; i < graph -> n_nodes ; i ++ ) {
@@ -857,9 +865,9 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
857
865
if (src == NULL ) {
858
866
continue ;
859
867
}
860
- ggml_gallocr_init_tensor (galloc , src , node_alloc -> buffer_id , & node_alloc -> src [j ]);
868
+ ggml_gallocr_init_tensor (galloc , src , & node_alloc -> src [j ]);
861
869
}
862
- ggml_gallocr_init_tensor (galloc , node , node_alloc -> buffer_id , & node_alloc -> dst );
870
+ ggml_gallocr_init_tensor (galloc , node , & node_alloc -> dst );
863
871
}
864
872
865
873
return true;
0 commit comments