File tree 1 file changed +9
-3
lines changed 1 file changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -1558,6 +1558,7 @@ struct llama_server_context
1558
1558
void process_tasks ()
1559
1559
{
1560
1560
std::unique_lock<std::mutex> lock (mutex_tasks);
1561
+ std::vector<task_server> deferred_tasks;
1561
1562
while (!queue_tasks.empty ())
1562
1563
{
1563
1564
task_server task = queue_tasks.front ();
@@ -1568,9 +1569,8 @@ struct llama_server_context
1568
1569
llama_client_slot *slot = get_slot (json_value (task.data , " slot_id" , -1 ));
1569
1570
if (slot == nullptr )
1570
1571
{
1571
- LOG_TEE (" slot unavailable\n " );
1572
- // send error result
1573
- send_error (task, " slot unavailable" );
1572
+ // if no slot is available, we defer this task for processing later
1573
+ deferred_tasks.push_back (task);
1574
1574
break ;
1575
1575
}
1576
1576
@@ -1616,6 +1616,12 @@ struct llama_server_context
1616
1616
}
1617
1617
}
1618
1618
1619
+ // add all the deferred tasks back the the queue
1620
+ for (task_server &task : deferred_tasks)
1621
+ {
1622
+ queue_tasks.push_back (task);
1623
+ }
1624
+
1619
1625
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
1620
1626
std::vector<task_result> agg_results;
1621
1627
auto queue_iterator = queue_multitasks.begin ();
You can’t perform that action at this time.
0 commit comments