@@ -68,7 +68,7 @@ struct ggml_allocr {
68
68
size_t max_size ;
69
69
bool measure ;
70
70
int parse_seq [GGML_MAX_NODES ];
71
- bool has_parse_seq ;
71
+ int parse_seq_len ;
72
72
73
73
#ifdef GGML_ALLOCATOR_DEBUG
74
74
struct ggml_tensor * allocated_tensors [1024 ];
@@ -239,14 +239,10 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
239
239
}
240
240
241
241
void ggml_allocr_set_parse_seq (struct ggml_allocr * alloc , const int * list , int n ) {
242
- int pos = 0 ;
243
242
for (int i = 0 ; i < n ; i ++ ) {
244
- if (list [i ] != -1 ) {
245
- alloc -> parse_seq [pos ] = list [i ];
246
- pos ++ ;
247
- }
243
+ alloc -> parse_seq [i ] = list [i ];
248
244
}
249
- alloc -> has_parse_seq = true ;
245
+ alloc -> parse_seq_len = n ;
250
246
}
251
247
252
248
void ggml_allocr_reset (struct ggml_allocr * alloc ) {
@@ -269,7 +265,7 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
269
265
/*.max_size = */ 0 ,
270
266
/*.measure = */ false,
271
267
/*.parse_seq = */ {0 },
272
- /*.has_parse_seq = */ false ,
268
+ /*.parse_seq_len = */ 0 ,
273
269
#ifdef GGML_ALLOCATOR_DEBUG
274
270
/*.allocated_tensors = */ = {0 },
275
271
#endif
@@ -298,7 +294,7 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
298
294
/*.max_size = */ 0 ,
299
295
/*.measure = */ true,
300
296
/*.parse_seq = */ {0 },
301
- /*.has_parse_seq = */ false ,
297
+ /*.parse_seq_len = */ 0 ,
302
298
#ifdef GGML_ALLOCATOR_DEBUG
303
299
/*.allocated_tensors = */ = {0 },
304
300
#endif
@@ -445,8 +441,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
445
441
else {
446
442
AT_PRINTF ("reusing parent %s for %s\n" , parent -> name , node -> name );
447
443
node -> data = parent -> data ;
444
+ return ;
448
445
}
449
- return ;
450
446
}
451
447
}
452
448
}
@@ -497,69 +493,86 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
497
493
allocate_node (alloc , input );
498
494
}
499
495
}
500
- for (int ind = 0 ; ind < gf -> n_nodes ; ind ++ ) {
501
- int i ;
502
- if (alloc -> has_parse_seq ) {
503
- i = alloc -> parse_seq [ind ];
504
- } else {
505
- i = ind ;
506
- }
507
- struct ggml_tensor * node = gf -> nodes [i ];
508
-
509
- // allocate parents (leafs)
510
- for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
511
- struct ggml_tensor * parent = node -> src [j ];
512
- if (parent == NULL ) {
513
- break ;
496
+ // if we have parse_seq then we allocate nodes following the list, and we only free nodes at barriers
497
+ int last_barrier_pos = 0 ;
498
+ int n_nodes = alloc -> parse_seq_len ? alloc -> parse_seq_len : gf -> n_nodes ;
499
+
500
+ for (int ind = 0 ; ind < n_nodes ; ind ++ ) {
501
+ // allocate a node if there is no parse_seq or this is not a barrier
502
+ if ((alloc -> parse_seq_len == 0 ) || alloc -> parse_seq [ind ] != -1 ) {
503
+ int i = alloc -> parse_seq_len ? alloc -> parse_seq [ind ] : ind ;
504
+ struct ggml_tensor * node = gf -> nodes [i ];
505
+
506
+ // allocate parents (leafs)
507
+ for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
508
+ struct ggml_tensor * parent = node -> src [j ];
509
+ if (parent == NULL ) {
510
+ break ;
511
+ }
512
+ allocate_node (alloc , parent );
514
513
}
515
- allocate_node (alloc , parent );
516
- }
517
514
518
- // allocate node
519
- allocate_node (alloc , node );
515
+ // allocate node
516
+ allocate_node (alloc , node );
520
517
521
- AT_PRINTF ("exec: %s (%s) <= " , ggml_op_name (node -> op ), node -> name );
522
- for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
523
- struct ggml_tensor * parent = node -> src [j ];
524
- if (parent == NULL ) {
525
- break ;
526
- }
527
- AT_PRINTF ("%s" , parent -> name );
528
- if (j < GGML_MAX_SRC - 1 && node -> src [j + 1 ] != NULL ) {
529
- AT_PRINTF (", " );
518
+ AT_PRINTF ("exec: %s (%s) <= " , ggml_op_name (node -> op ), node -> name );
519
+ for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
520
+ struct ggml_tensor * parent = node -> src [j ];
521
+ if (parent == NULL ) {
522
+ break ;
523
+ }
524
+ AT_PRINTF ("%s" , parent -> name );
525
+ if (j < GGML_MAX_SRC - 1 && node -> src [j + 1 ] != NULL ) {
526
+ AT_PRINTF (", " );
527
+ }
530
528
}
529
+ AT_PRINTF ("\n" );
531
530
}
532
- AT_PRINTF ( "\n" );
531
+
533
532
534
533
// update parents
535
- for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
536
- struct ggml_tensor * parent = node -> src [j ];
537
- if (parent == NULL ) {
538
- break ;
539
- }
540
- struct hash_node * p_hn = hash_get (ht , parent );
541
- p_hn -> n_children -= 1 ;
542
-
543
- //AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views);
544
-
545
- if (p_hn -> n_children == 0 && p_hn -> n_views == 0 ) {
546
- if (ggml_is_view (parent )) {
547
- struct ggml_tensor * view_src = get_view_source (parent );
548
- struct hash_node * view_src_hn = hash_get (ht , view_src );
549
- view_src_hn -> n_views -= 1 ;
550
- AT_PRINTF ("view_src %s\n" , view_src -> name );
551
- if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src -> data != node -> data ) {
552
- ggml_allocator_free_tensor (alloc , view_src );
534
+ // update immediately if there is no parse_seq
535
+ // update only at barriers if there is parse_seq
536
+ if ((alloc -> parse_seq_len == 0 ) || alloc -> parse_seq [ind ] == -1 ) {
537
+ int update_start = alloc -> parse_seq_len ? last_barrier_pos : ind ;
538
+ int update_end = alloc -> parse_seq_len ? ind : ind + 1 ;
539
+ for (int i = update_start ; i < update_end ; i ++ ) {
540
+ int node_i = alloc -> parse_seq_len ? alloc -> parse_seq [i ] : i ;
541
+ struct ggml_tensor * node = gf -> nodes [node_i ];
542
+
543
+ for (int j = 0 ; j < GGML_MAX_SRC ; j ++ ) {
544
+ struct ggml_tensor * parent = node -> src [j ];
545
+ if (parent == NULL ) {
546
+ break ;
553
547
}
554
- }
555
- else {
556
- if (parent -> data != node -> data ) {
557
- ggml_allocator_free_tensor (alloc , parent );
548
+ struct hash_node * p_hn = hash_get (ht , parent );
549
+ p_hn -> n_children -= 1 ;
550
+
551
+ //AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views);
552
+
553
+ if (p_hn -> n_children == 0 && p_hn -> n_views == 0 ) {
554
+ if (ggml_is_view (parent )) {
555
+ struct ggml_tensor * view_src = get_view_source (parent );
556
+ struct hash_node * view_src_hn = hash_get (ht , view_src );
557
+ view_src_hn -> n_views -= 1 ;
558
+ AT_PRINTF ("view_src %s\n" , view_src -> name );
559
+ if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src -> data != node -> data ) {
560
+ ggml_allocator_free_tensor (alloc , view_src );
561
+ }
562
+ }
563
+ else {
564
+ if (parent -> data != node -> data ) {
565
+ ggml_allocator_free_tensor (alloc , parent );
566
+ }
567
+ }
558
568
}
559
569
}
560
570
}
571
+ AT_PRINTF ("\n" );
572
+ if (alloc -> parse_seq_len ) {
573
+ last_barrier_pos = ind + 1 ;
574
+ }
561
575
}
562
- AT_PRINTF ("\n" );
563
576
}
564
577
// free graph outputs here that wouldn't be freed otherwise because they have no children
565
578
if (outputs != NULL && outputs [g ] != NULL ) {
0 commit comments