From f034effa22726121ac13810d6edf62526019c960 Mon Sep 17 00:00:00 2001 From: ydlme Date: Fri, 10 Nov 2023 22:53:25 +0800 Subject: [PATCH] server: fix core dump when input prompt larger than prompt context (n_ctx) --- examples/server/server.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cbf36ad6752b6..5139b7356a038 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1560,6 +1560,13 @@ struct llama_server_context if (!slot.params.cache_prompt) { llama_sampling_reset(slot.ctx_sampling); + // if input prompt is too big, truncate it + if (slot.num_prompt_tokens >= slot.n_ctx) + { + slot.num_prompt_tokens = slot.n_ctx - 1; + prompt_tokens = std::vector(prompt_tokens.end() - slot.num_prompt_tokens, prompt_tokens.end()); + slot.truncated = true; + } slot.n_past = 0; slot.num_prompt_tokens_processed = slot.num_prompt_tokens;