@@ -496,8 +496,6 @@ enum llm_tensor {
496
496
LLM_TENSOR_ATTN_KV_B,
497
497
LLM_TENSOR_ATTN_Q_A_NORM,
498
498
LLM_TENSOR_ATTN_KV_A_NORM,
499
- LLM_TENSOR_LAYER_NORM_1,
500
- LLM_TENSOR_LAYER_NORM_2,
501
499
};
502
500
503
501
static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
@@ -719,8 +717,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
719
717
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
720
718
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
721
719
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
722
- { LLM_TENSOR_LAYER_NORM_1, "blk.%d.layer_norm_1" },
723
- { LLM_TENSOR_LAYER_NORM_2, "blk.%d.layer_norm_2" },
720
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
724
721
},
725
722
},
726
723
{
@@ -2014,12 +2011,6 @@ struct llama_layer {
2014
2011
struct ggml_tensor * layer_out_norm_b;
2015
2012
struct ggml_tensor * ffn_norm_exps;
2016
2013
2017
- // extra normalization layers needed by `jina-embeddings-v2-base-code`
2018
- struct ggml_tensor * layer_norm_1;
2019
- struct ggml_tensor * layer_norm_1_b;
2020
- struct ggml_tensor * layer_norm_2;
2021
- struct ggml_tensor * layer_norm_2_b;
2022
-
2023
2014
// ff
2024
2015
struct ggml_tensor * ffn_gate; // w1
2025
2016
struct ggml_tensor * ffn_down; // w2
@@ -4680,7 +4671,8 @@ static void llm_load_vocab(
4680
4671
tokenizer_pre == "jina-es" ||
4681
4672
tokenizer_pre == "jina-de" ||
4682
4673
tokenizer_pre == "jina-v2-es" ||
4683
- tokenizer_pre == "jina-v2-de") {
4674
+ tokenizer_pre == "jina-v2-de" ||
4675
+ tokenizer_pre == "jina-v2-code") {
4684
4676
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
4685
4677
} else if (
4686
4678
tokenizer_pre == "refact") {
@@ -5547,12 +5539,9 @@ static bool llm_load_tensors(
5547
5539
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
5548
5540
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
5549
5541
5550
- layer.layer_norm_1 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1 , "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5551
- layer.layer_norm_1_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_1 , "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5542
+ layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2 , "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5543
+ layer.attn_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2 , "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5552
5544
5553
- layer.layer_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5554
- layer.layer_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5555
-
5556
5545
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5557
5546
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5558
5547
@@ -8516,12 +8505,8 @@ struct llm_build_context {
8516
8505
// attention layer norm
8517
8506
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il);
8518
8507
8519
- if (model.layers[il].layer_norm_1 != nullptr) {
8520
- cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_1, model.layers[il].layer_norm_1_b, LLM_NORM, cb, il);
8521
- }
8522
-
8523
- if (model.layers[il].layer_norm_2 != nullptr) {
8524
- cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_norm_2, model.layers[il].layer_norm_2_b, LLM_NORM, cb, il);
8508
+ if (model.layers[il].attn_norm_2 != nullptr) {
8509
+ cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm_2, model.layers[il].attn_norm_2_b, LLM_NORM, cb, il);
8525
8510
}
8526
8511
8527
8512
struct ggml_tensor * ffn_inp = cur;
0 commit comments