Skip to content

Commit bb63809

Browse files
committed
fix: Add missing padding to n_ctx for hybrid cache construction
Branch: GraniteFour Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 1594fa9 commit bb63809

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13236,13 +13236,17 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1323613236
std::max((uint32_t) 1, cparams.n_seq_max),
1323713237
cparams.n_seq_max);
1323813238
} else if (llm_arch_is_hybrid_recurrent(arch)) {
13239+
const auto padding = llama_kv_cache_unified::get_padding(cparams);
13240+
13241+
cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding);
13242+
1323913243
res = new llama_kv_cache_hybrid_recurrent(
1324013244
/* model */ *this,
1324113245
/* attn_type_k */ params.type_k,
1324213246
/* attn_type_v */ params.type_v,
1324313247
/* attn_v_trans */ !cparams.flash_attn,
1324413248
/* attn_kv_size */ cparams.n_ctx,
13245-
/* attn_n_pad */ llama_kv_cache_unified::get_padding(cparams),
13249+
/* attn_n_pad */ padding,
1324613250
/* attn_n_swa */ hparams.n_swa,
1324713251
/* attn_swa_type */ hparams.swa_type,
1324813252
/* recurrent_type_k */ GGML_TYPE_F32,

0 commit comments

Comments
 (0)