Skip to content

Commit 6997be9

Browse files
committed
MPT : support GQA for replit-code-v1.5 (ggml-org#3627)
(cherry picked from commit 11bff29)
1 parent c6ab72d commit 6997be9

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

convert-mpt-hf-to-gguf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def parse_args() -> argparse.Namespace:
9898
gguf_writer.add_block_count(block_count)
9999
gguf_writer.add_feed_forward_length(4 * hparams["d_model"])
100100
gguf_writer.add_head_count(hparams["n_heads"])
101+
if kv_n_heads := hparams["attn_config"].get("kv_n_heads"):
102+
gguf_writer.add_head_count_kv(kv_n_heads)
101103
gguf_writer.add_layer_norm_eps(1e-05)
102104
if hparams["attn_config"]["clip_qkv"] is not None:
103105
gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"])

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,8 +2383,8 @@ static void llm_load_tensors(
23832383
auto & layer = model.layers[i];
23842384

23852385
layer.attn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, backend);
2386-
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, 3*n_embd}, backend_split);
2387-
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
2386+
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, backend_split);
2387+
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
23882388

23892389
layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend);
23902390

0 commit comments

Comments
 (0)