Skip to content

Commit 0ea069b

Browse files
authored
server : fix prompt caching with system prompt (#4914)
1 parent f172de0 commit 0ea069b

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

examples/server/server.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,8 +1180,9 @@ struct llama_server_context
11801180
return slot.images.size() > 0;
11811181
}
11821182

1183-
void send_error(task_server& task, std::string error)
1183+
void send_error(task_server& task, const std::string &error)
11841184
{
1185+
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
11851186
std::unique_lock<std::mutex> lock(mutex_results);
11861187
task_result res;
11871188
res.id = task.id;
@@ -1570,12 +1571,22 @@ struct llama_server_context
15701571
LOG_TEE("slot unavailable\n");
15711572
// send error result
15721573
send_error(task, "slot unavailable");
1573-
return;
1574+
break;
15741575
}
15751576

15761577
if (task.data.contains("system_prompt"))
15771578
{
1579+
if (!all_slots_are_idle) {
1580+
send_error(task, "system prompt can only be updated when all slots are idle");
1581+
break;
1582+
}
15781583
process_system_prompt_data(task.data["system_prompt"]);
1584+
1585+
// reset cache_tokens for all slots
1586+
for (llama_client_slot &slot : slots)
1587+
{
1588+
slot.cache_tokens.clear();
1589+
}
15791590
}
15801591

15811592
slot->reset();
@@ -1652,8 +1663,7 @@ struct llama_server_context
16521663
// attend tasks
16531664
process_tasks();
16541665

1655-
// update the system prompt wait until all slots are idle state
1656-
if (system_need_update && all_slots_are_idle)
1666+
if (system_need_update)
16571667
{
16581668
LOG_TEE("updating system prompt\n");
16591669
update_system_prompt();

0 commit comments

Comments
 (0)