@@ -1609,11 +1609,11 @@ static struct ggml_cgraph * llama_build_graph(
1609
1609
ggml_set_name (Q, " Q" );
1610
1610
1611
1611
struct ggml_tensor * K =
1612
- ggml_permute (ctx0,
1613
- ggml_reshape_3d (ctx0 ,
1614
- ggml_view_1d (ctx0, kv_self. k , (n_past + N)*n_embd_gqa, il*n_ctx* ggml_element_size (kv_self.k )*n_embd_gqa) ,
1615
- n_embd_head, n_head_kv, n_past + N) ,
1616
- 0 , 2 , 1 , 3 );
1612
+ ggml_view_3d (ctx0, kv_self. k ,
1613
+ n_embd_head, n_past + N, n_head_kv ,
1614
+ ggml_element_size (kv_self.k )*n_embd_gqa,
1615
+ ggml_element_size (kv_self. k )*n_embd_head ,
1616
+ ggml_element_size (kv_self. k )*n_embd_gqa*n_ctx*il );
1617
1617
offload_func_kq (K);
1618
1618
ggml_set_name (K, " K" );
1619
1619
@@ -1642,9 +1642,9 @@ static struct ggml_cgraph * llama_build_graph(
1642
1642
struct ggml_tensor * V =
1643
1643
ggml_view_3d (ctx0, kv_self.v ,
1644
1644
n_past + N, n_embd_head, n_head_kv,
1645
- n_ctx* ggml_element_size (kv_self.v ),
1646
- n_ctx* ggml_element_size (kv_self.v )*n_embd_head,
1647
- n_ctx* ggml_element_size (kv_self.v )*n_embd_gqa*il);
1645
+ ggml_element_size (kv_self.v )*n_ctx ,
1646
+ ggml_element_size (kv_self.v )*n_ctx *n_embd_head,
1647
+ ggml_element_size (kv_self.v )*n_ctx *n_embd_gqa*il);
1648
1648
offload_func_v (V);
1649
1649
ggml_set_name (V, " V" );
1650
1650
0 commit comments