@@ -300,6 +300,7 @@ def add_request(
300
300
def abort_request (self , request_id : Union [str , Iterable [str ]]) -> None :
301
301
self .scheduler .finish_requests (request_id ,
302
302
RequestStatus .FINISHED_ABORTED )
303
+ self ._free_request (request_id )
303
304
304
305
def get_num_unfinished_requests (self ) -> int :
305
306
"""Gets the number of unfinished requests."""
@@ -361,6 +362,11 @@ def recv_from_detokenizer(self) -> List[RequestOutput]:
361
362
num_reqs = len (detokenizer_output .req_ids )
362
363
for i in range (num_reqs ):
363
364
req_id = detokenizer_output .req_ids [i ]
365
+ if req_id not in self .requests :
366
+ # The request has been aborted while the detokenizer was
367
+ # processing the outputs.
368
+ continue
369
+
364
370
req = self .requests [req_id ]
365
371
req .output_text += detokenizer_output .detokenized_texts [i ]
366
372
@@ -373,9 +379,7 @@ def recv_from_detokenizer(self) -> List[RequestOutput]:
373
379
req_outputs .append (req_output )
374
380
375
381
if finished :
376
- del self .requests [req_id ]
377
- del self .num_lagged_steps [req_id ]
378
- del self .request_outputs [req_id ]
382
+ self ._free_request (req_id )
379
383
return req_outputs
380
384
381
385
def terminate_detokenizer (self ) -> None :
@@ -440,6 +444,11 @@ def _make_request_output(
440
444
req_output .finished = finished
441
445
return req_output
442
446
447
+ def _free_request (self , request_id : str ) -> None :
448
+ self .requests .pop (request_id , None )
449
+ self .num_lagged_steps .pop (request_id , None )
450
+ self .request_outputs .pop (request_id , None )
451
+
443
452
def check_health (self ) -> None :
444
453
if self .tokenizer :
445
454
self .tokenizer .check_health ()
0 commit comments