Skip to content

Commit 821f0a2

Browse files
ngxsonXuan Son Nguyen
and
Xuan Son Nguyen
authored
server : defer tasks when "slot unavailable" (#5018)
* server: defer task when no slot is available * remove unnecessary log --------- Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent 96d7f56 commit 821f0a2

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

examples/server/server.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,6 +1558,7 @@ struct llama_server_context
15581558
void process_tasks()
15591559
{
15601560
std::unique_lock<std::mutex> lock(mutex_tasks);
1561+
std::vector<task_server> deferred_tasks;
15611562
while (!queue_tasks.empty())
15621563
{
15631564
task_server task = queue_tasks.front();
@@ -1568,9 +1569,8 @@ struct llama_server_context
15681569
llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
15691570
if (slot == nullptr)
15701571
{
1571-
LOG_TEE("slot unavailable\n");
1572-
// send error result
1573-
send_error(task, "slot unavailable");
1572+
// if no slot is available, we defer this task for processing later
1573+
deferred_tasks.push_back(task);
15741574
break;
15751575
}
15761576

@@ -1616,6 +1616,12 @@ struct llama_server_context
16161616
}
16171617
}
16181618

1619+
// add all the deferred tasks back the the queue
1620+
for (task_server &task : deferred_tasks)
1621+
{
1622+
queue_tasks.push_back(task);
1623+
}
1624+
16191625
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
16201626
std::vector<task_result> agg_results;
16211627
auto queue_iterator = queue_multitasks.begin();

0 commit comments

Comments
 (0)