@@ -18620,7 +18620,7 @@ struct llm_build_cogvlm : public llm_graph_context {
1862018620
1862118621 ggml_tensor * inp_pos = build_inp_pos();
1862218622
18623- auto * inp_attn = build_attn_inp_kv_unified ();
18623+ auto * inp_attn = build_attn_inp_kv ();
1862418624
1862518625 // check ubatch to see if we have input tokens (text)
1862618626 // or an input embedding vector (image)
@@ -18662,15 +18662,13 @@ struct llm_build_cogvlm : public llm_graph_context {
1866218662 qkv->nb[1], 0);
1866318663 ggml_tensor * Kcur = ggml_view_3d(ctx0, qkv, n_embd_head, n_head_kv, n_tokens, n_embd_head * sizeof(float),
1866418664 qkv->nb[1], n_embd * ggml_element_size(qkv));
18665- ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, qkv, n_embd, n_tokens,
18666- qkv->nb[1], 2 * n_embd * ggml_element_size(qkv)));
18667-
18668- Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
18665+ ggml_tensor * Vcur = ggml_view_3d(ctx0, qkv, n_embd_head, n_head_kv, n_tokens, n_embd_head * sizeof(float),
18666+ qkv->nb[1], 2 * n_embd * ggml_element_size(qkv));
1866918667
1867018668 Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, rope_type);
1867118669 Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, rope_type);
1867218670
18673- cur = build_attn(inp_attn, wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
18671+ cur = build_attn(inp_attn, wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
1867418672 cb(cur, "attn_out", il);
1867518673 }
1867618674
0 commit comments