Skip to content

Commit a73ccf1

Browse files
authored
llama : replace (permute + reshape + view_1d) with (view_3d) (#2538)
ggml-ci
1 parent 7cf54e1 commit a73ccf1

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

llama.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1609,11 +1609,11 @@ static struct ggml_cgraph * llama_build_graph(
16091609
ggml_set_name(Q, "Q");
16101610

16111611
struct ggml_tensor * K =
1612-
ggml_permute(ctx0,
1613-
ggml_reshape_3d(ctx0,
1614-
ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa),
1615-
n_embd_head, n_head_kv, n_past + N),
1616-
0, 2, 1, 3);
1612+
ggml_view_3d(ctx0, kv_self.k,
1613+
n_embd_head, n_past + N, n_head_kv,
1614+
ggml_element_size(kv_self.k)*n_embd_gqa,
1615+
ggml_element_size(kv_self.k)*n_embd_head,
1616+
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il);
16171617
offload_func_kq(K);
16181618
ggml_set_name(K, "K");
16191619

@@ -1642,9 +1642,9 @@ static struct ggml_cgraph * llama_build_graph(
16421642
struct ggml_tensor * V =
16431643
ggml_view_3d(ctx0, kv_self.v,
16441644
n_past + N, n_embd_head, n_head_kv,
1645-
n_ctx*ggml_element_size(kv_self.v),
1646-
n_ctx*ggml_element_size(kv_self.v)*n_embd_head,
1647-
n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il);
1645+
ggml_element_size(kv_self.v)*n_ctx,
1646+
ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
1647+
ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
16481648
offload_func_v(V);
16491649
ggml_set_name(V, "V");
16501650

0 commit comments

Comments
 (0)