@@ -221,6 +221,7 @@ enum llm_arch {
221
221
LLM_ARCH_COMMAND_R,
222
222
LLM_ARCH_DBRX,
223
223
LLM_ARCH_OLMO,
224
+ LLM_ARCH_GRANITE_SMALL,
224
225
LLM_ARCH_UNKNOWN,
225
226
};
226
227
@@ -257,6 +258,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
257
258
{ LLM_ARCH_COMMAND_R, "command-r" },
258
259
{ LLM_ARCH_DBRX, "dbrx" },
259
260
{ LLM_ARCH_OLMO, "olmo" },
261
+ { LLM_ARCH_GRANITE_SMALL, "granite-small" },
260
262
{ LLM_ARCH_UNKNOWN, "(unknown)" },
261
263
};
262
264
@@ -1032,6 +1034,32 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
1032
1034
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1033
1035
},
1034
1036
},
1037
+ {
1038
+ LLM_ARCH_GRANITE_SMALL,
1039
+ {
1040
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1041
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1042
+ { LLM_TENSOR_OUTPUT, "output" },
1043
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1044
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1045
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1046
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1047
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1048
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1049
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1050
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1051
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1052
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1053
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1054
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1055
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
1056
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
1057
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
1058
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1059
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1060
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1061
+ },
1062
+ },
1035
1063
{
1036
1064
LLM_ARCH_UNKNOWN,
1037
1065
{
@@ -4344,6 +4372,16 @@ static void llm_load_hparams(
4344
4372
default: model.type = e_model::MODEL_UNKNOWN;
4345
4373
}
4346
4374
} break;
4375
+ case LLM_ARCH_GRANITE_SMALL:
4376
+ {
4377
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
4378
+
4379
+ switch (hparams.n_layer) {
4380
+ case 32: model.type = e_model::MODEL_3B; break;
4381
+ case 36: model.type = e_model::MODEL_8B; break;
4382
+ default: model.type = e_model::MODEL_UNKNOWN;
4383
+ }
4384
+ } break;
4347
4385
default: (void)0;
4348
4386
}
4349
4387
@@ -4453,6 +4491,9 @@ static void llm_load_vocab(
4453
4491
} else {
4454
4492
if (tokenizer_model == "gpt2") {
4455
4493
vocab.type = LLAMA_VOCAB_TYPE_BPE;
4494
+ if (model.arch == LLM_ARCH_GRANITE_SMALL) {
4495
+ vocab.add_space_prefix = false;
4496
+ }
4456
4497
} else {
4457
4498
LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_model.c_str());
4458
4499
LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
@@ -5023,6 +5064,7 @@ static bool llm_load_tensors(
5023
5064
case LLM_ARCH_LLAMA:
5024
5065
case LLM_ARCH_REFACT:
5025
5066
case LLM_ARCH_MINICPM:
5067
+ case LLM_ARCH_GRANITE_SMALL:
5026
5068
{
5027
5069
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
5028
5070
@@ -10893,6 +10935,7 @@ static struct ggml_cgraph * llama_build_graph(
10893
10935
10894
10936
switch (model.arch) {
10895
10937
case LLM_ARCH_LLAMA:
10938
+ case LLM_ARCH_GRANITE_SMALL:
10896
10939
{
10897
10940
result = llm.build_llama();
10898
10941
} break;
@@ -16038,6 +16081,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
16038
16081
case LLM_ARCH_GEMMA:
16039
16082
case LLM_ARCH_STARCODER2:
16040
16083
case LLM_ARCH_GPTNEOX:
16084
+ case LLM_ARCH_GRANITE_SMALL:
16041
16085
return LLAMA_ROPE_TYPE_NEOX;
16042
16086
16043
16087
// all model arches should be listed explicitly here
0 commit comments