Skip to content

Commit 57dd55e

Browse files
authored
server : fix kv cache management (#3588)
1 parent b8fe4b5 commit 57dd55e

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

examples/server/server.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,13 +405,17 @@ struct llama_server_context
405405
// compare the evaluated prompt with the new prompt
406406
n_past = common_part(embd, prompt_tokens);
407407
embd = prompt_tokens;
408+
408409
if (n_past == num_prompt_tokens)
409410
{
410411
// we have to evaluate at least 1 token to generate logits.
411412
printf("we have to evaluate at least 1 token to generate logits\n");
412413
n_past--;
413414
}
414415

416+
// since #3228 we now have to manually manage the KV cache
417+
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
418+
415419
LOG_VERBOSE("prompt ingested", {
416420
{"n_past", n_past},
417421
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
@@ -461,16 +465,16 @@ struct llama_server_context
461465
// compare the evaluated prompt with the new prompt
462466
n_past = common_part(embd, prompt_tokens);
463467

464-
// since #3228 we now have to manually manage the KV cache
465-
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
466-
467468
embd = prompt_tokens;
468469
if (n_past == num_prompt_tokens)
469470
{
470471
// we have to evaluate at least 1 token to generate logits.
471472
n_past--;
472473
}
473474

475+
// since #3228 we now have to manually manage the KV cache
476+
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
477+
474478
LOG_VERBOSE("prompt ingested", {
475479
{"n_past", n_past},
476480
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},

0 commit comments

Comments
 (0)