Skip to content

Commit 051e1b0

Browse files
committed
llama : fix kv_cache n init (close #1903)
1 parent 86c7571 commit 051e1b0

File tree

3 files changed

+4
-0
lines changed

3 files changed

+4
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ models/*
3434
/perplexity
3535
/embedding
3636
/train-text-from-scratch
37+
/simple
3738
/benchmark-matmult
3839
/vdot
3940
/server

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ else()
3838
add_subdirectory(benchmark)
3939
add_subdirectory(baby-llama)
4040
add_subdirectory(train-text-from-scratch)
41+
add_subdirectory(simple)
4142
if (LLAMA_METAL)
4243
add_subdirectory(metal)
4344
endif()

llama.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,7 @@ static bool kv_cache_init(
886886
const int64_t n_elements = n_embd*n_mem;
887887

888888
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
889+
cache.n = 0;
889890

890891
struct ggml_init_params params;
891892
params.mem_size = cache.buf.size;
@@ -904,6 +905,7 @@ static bool kv_cache_init(
904905
ggml_set_name(cache.k, "cache_k");
905906
ggml_set_name(cache.v, "cache_v");
906907

908+
(void) n_gpu_layers;
907909
#ifdef GGML_USE_CUBLAS
908910
if (n_gpu_layers > n_layer + 1) {
909911
ggml_cuda_assign_buffers_no_scratch(cache.v);

0 commit comments

Comments
 (0)