Skip to content

Commit 57e2a7a

Browse files
cmp-nctggerganov
andauthored
llama : fix falcon arch for tied output embeddings (#4978)
* falcon arch fix for tied output embeddings * Update llama.cpp Co-authored-by: Georgi Gerganov <[email protected]> * Update llama.cpp * Update llama.cpp Co-authored-by: Georgi Gerganov <[email protected]> * Update llama.cpp --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 9b6ea42 commit 57e2a7a

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

llama.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3438,7 +3438,12 @@ static bool llm_load_tensors(
34383438
{
34393439
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
34403440
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd});
3441-
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
3441+
if (gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_OUTPUT, "weight").c_str()) >= 0) {
3442+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
3443+
} else {
3444+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // needs to be on GPU
3445+
ml.n_created--; // artificial tensor
3446+
}
34423447
}
34433448

34443449
for (int i = 0; i < n_layer; ++i) {

0 commit comments

Comments
 (0)