From 23c5b57ae3963208ac9e0575c30dd17cdea4e960 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 5 Jun 2025 13:45:34 +0200 Subject: [PATCH] llama : default pooling last for qwen3 --- src/llama-model.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index afef8487030fb..1559e4a6bbeb9 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -820,6 +820,8 @@ void llama_model::load_hparams(llama_model_loader & ml) { case LLM_ARCH_QWEN3: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + hparams.pooling_type = LLAMA_POOLING_TYPE_LAST; // for embeddings model + switch (hparams.n_layer) { case 28: type = hparams.n_embd == 1024 ? LLM_TYPE_0_6B : LLM_TYPE_1_7B; break; case 36: type = hparams.n_embd == 2560 ? LLM_TYPE_4B : LLM_TYPE_8B; break; @@ -830,8 +832,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { } break; case LLM_ARCH_QWEN3MOE: { - ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false); - + ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { case 48: type = LLM_TYPE_30B_A3B; break;