Skip to content

Commit b974e9f

Browse files
committed
llama: add support for small granite models
it works only for the small models 3b and 8b. The convert-hf-to-gguf.py script uses the vocabulary size of the granite models to detect granite and set the correct configuration. Signed-off-by: Giuseppe Scrivano <[email protected]>
1 parent 06748ff commit b974e9f

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

convert-hf-to-gguf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,10 @@ def set_gguf_parameters(self):
13221322
if "add_prefix_space" in tokenizer_config_json:
13231323
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
13241324

1325+
# Apply to granite small models only
1326+
if self.hparams.get("vocab_size", 32000) == 49152:
1327+
self.gguf_writer.add_add_bos_token(False)
1328+
13251329
@staticmethod
13261330
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
13271331
if n_head_kv is not None and n_head != n_head_kv:
@@ -1336,9 +1340,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
13361340
n_head = self.hparams["num_attention_heads"]
13371341
n_kv_head = self.hparams.get("num_key_value_heads")
13381342

1339-
if name.endswith("q_proj.weight"):
1343+
if name.endswith(("q_proj.weight", "q_proj.bias")):
13401344
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
1341-
if name.endswith("k_proj.weight"):
1345+
if name.endswith(("k_proj.weight", "k_proj.bias")):
13421346
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
13431347

13441348
# process the experts separately

llama.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3982,7 +3982,9 @@ static void llm_load_hparams(
39823982
switch (hparams.n_layer) {
39833983
case 22: model.type = e_model::MODEL_1B; break;
39843984
case 26: model.type = e_model::MODEL_3B; break;
3985-
case 32: model.type = hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B; break;
3985+
// granite uses a vocab with len 49152
3986+
case 32: model.type = hparams.n_vocab == 49152 ? e_model::MODEL_3B : (hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B); break;
3987+
case 36: model.type = e_model::MODEL_8B; break; // granite
39863988
case 40: model.type = e_model::MODEL_13B; break;
39873989
case 48: model.type = e_model::MODEL_34B; break;
39883990
case 60: model.type = e_model::MODEL_30B; break;
@@ -4252,6 +4254,8 @@ static void llm_load_hparams(
42524254
case 30: model.type = e_model::MODEL_3B; break;
42534255
case 32: model.type = e_model::MODEL_7B; break;
42544256
case 40: model.type = e_model::MODEL_15B; break;
4257+
case 52: model.type = e_model::MODEL_20B; break; // granite
4258+
case 88: model.type = e_model::MODEL_34B; break; // granite
42554259
default: model.type = e_model::MODEL_UNKNOWN;
42564260
}
42574261
} break;

0 commit comments

Comments
 (0)