ggml-alloc: avoid return silently

lshzh-ww · lshzh-ww · commit 0c268a83e866 · 2023-08-24T01:34:57.000-04:00
In certain cases, the allocate_node() function may silently return
without performing any memory allocation.
diff --git a/ggml-alloc.c b/ggml-alloc.c
@@ -441,8 +441,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
                         else {
                             AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                             node->data = parent->data;
+                            return;
                         }
-                        return;
                     }
                 }
             }
@@ -528,7 +528,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
                 }
                 AT_PRINTF("\n");
             }
-            
+
 
             // update parents
             // update immediately if there is no parse_seq
diff --git a/llama.cpp b/llama.cpp
@@ -2704,11 +2704,6 @@ static struct ggml_cgraph * llm_build_falcon(
             struct ggml_tensor * inpFF = attn_norm;
 
             cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
-
-            // TODO: this is temporary needed to introduce artificial dependency between FF and ATTN
-            //       adding this, because there seems to be a bug in the Metal concurrency optimization
-            //       without this line, the results are non-deterministic and wrong
-            cur->src[2] = attn_out;
             offload_func(cur);
 
             cur = ggml_gelu(ctx0, cur);

Original file line number	Diff line number	Diff line change
`@@ -441,8 +441,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)`
`441`	`441`	`else {`
`442`	`442`	`AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);`
`443`	`443`	`node->data = parent->data;`
	`444`	`+ return;`
`444`	`445`	`}`
`445`		`- return;`
`446`	`446`	`}`
`447`	`447`	`}`
`448`	`448`	`}`
`@@ -528,7 +528,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(`
`528`	`528`	`}`
`529`	`529`	`AT_PRINTF("\n");`
`530`	`530`	`}`
`531`		`-`
	`531`	`+`
`532`	`532`
`533`	`533`	`// update parents`
`534`	`534`	`// update immediately if there is no parse_seq`