@@ -1609,11 +1609,11 @@ static struct ggml_cgraph * llama_build_graph(
16091609 ggml_set_name (Q, " Q" );
16101610
16111611 struct ggml_tensor * K =
1612- ggml_permute (ctx0,
1613- ggml_reshape_3d (ctx0 ,
1614- ggml_view_1d (ctx0, kv_self. k , (n_past + N)*n_embd_gqa, il*n_ctx* ggml_element_size (kv_self.k )*n_embd_gqa) ,
1615- n_embd_head, n_head_kv, n_past + N) ,
1616- 0 , 2 , 1 , 3 );
1612+ ggml_view_3d (ctx0, kv_self. k ,
1613+ n_embd_head, n_past + N, n_head_kv ,
1614+ ggml_element_size (kv_self.k )*n_embd_gqa,
1615+ ggml_element_size (kv_self. k )*n_embd_head ,
1616+ ggml_element_size (kv_self. k )*n_embd_gqa*n_ctx*il );
16171617 offload_func_kq (K);
16181618 ggml_set_name (K, " K" );
16191619
@@ -1642,9 +1642,9 @@ static struct ggml_cgraph * llama_build_graph(
16421642 struct ggml_tensor * V =
16431643 ggml_view_3d (ctx0, kv_self.v ,
16441644 n_past + N, n_embd_head, n_head_kv,
1645- n_ctx* ggml_element_size (kv_self.v ),
1646- n_ctx* ggml_element_size (kv_self.v )*n_embd_head,
1647- n_ctx* ggml_element_size (kv_self.v )*n_embd_gqa*il);
1645+ ggml_element_size (kv_self.v )*n_ctx ,
1646+ ggml_element_size (kv_self.v )*n_ctx *n_embd_head,
1647+ ggml_element_size (kv_self.v )*n_ctx *n_embd_gqa*il);
16481648 offload_func_v (V);
16491649 ggml_set_name (V, " V" );
16501650
0 commit comments