Skip to content

Commit 0c268a8

Browse files
committed
ggml-alloc: avoid return silently
In certain cases, the allocate_node() function may silently return without performing any memory allocation.
1 parent ee8b2aa commit 0c268a8

File tree

2 files changed

+2
-7
lines changed

2 files changed

+2
-7
lines changed

ggml-alloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
441441
else {
442442
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
443443
node->data = parent->data;
444+
return;
444445
}
445-
return;
446446
}
447447
}
448448
}
@@ -528,7 +528,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
528528
}
529529
AT_PRINTF("\n");
530530
}
531-
531+
532532

533533
// update parents
534534
// update immediately if there is no parse_seq

llama.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2704,11 +2704,6 @@ static struct ggml_cgraph * llm_build_falcon(
27042704
struct ggml_tensor * inpFF = attn_norm;
27052705

27062706
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
2707-
2708-
// TODO: this is temporary needed to introduce artificial dependency between FF and ATTN
2709-
// adding this, because there seems to be a bug in the Metal concurrency optimization
2710-
// without this line, the results are non-deterministic and wrong
2711-
cur->src[2] = attn_out;
27122707
offload_func(cur);
27132708

27142709
cur = ggml_gelu(ctx0, cur);

0 commit comments

Comments
 (0)