Skip to content

Commit 06ee7d3

Browse files
committed
graph : rework inputs to use only unique_ptr, remove attn input abstraction
ggml-ci
1 parent e1a502c commit 06ee7d3

File tree

4 files changed

+182
-192
lines changed

4 files changed

+182
-192
lines changed

src/llama-context.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1874,13 +1874,11 @@ llm_graph_result_ptr llama_context_kv_self::build_kv_self_shift(
18741874

18751875
//GGML_ASSERT(kv_self->size == n_ctx);
18761876

1877-
auto inp = std::make_shared<llm_graph_input_k_shift>(kv_self.get());
1877+
auto inp = std::make_unique<llm_graph_input_k_shift>(kv_self.get());
18781878

18791879
inp->k_shift = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, cparams.n_ctx);
18801880
ggml_set_input(inp->k_shift);
18811881

1882-
res->add_input(inp);
1883-
18841882
for (uint32_t il = 0; il < n_layer; ++il) {
18851883
const int64_t n_head_kv = hparams.n_head_kv(il);
18861884
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa(il);
@@ -1899,6 +1897,8 @@ llm_graph_result_ptr llama_context_kv_self::build_kv_self_shift(
18991897
ggml_build_forward_expand(gf, cur);
19001898
}
19011899

1900+
res->add_input(std::move(inp));
1901+
19021902
return res;
19031903
}
19041904

0 commit comments

Comments
 (0)