From 86833d10ac56f09428caaf65eee7bd70c55a7973 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 4 Mar 2025 11:20:56 -0500 Subject: [PATCH] [V1][Metrics] Fix traceback with preemptions+LoRA Reported by Varun. ``` EngineCore output handler hit an error: 'cmpl-105f50b72108493bb3ffdb4f6fc6ea72-0' Traceback (most recent call last): File "/vllm-project/vllm/vllm/v1/engine/async_llm.py", line 274, in _run_output_handler processed_outputs = self.output_processor.process_outputs( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/vllm-project/vllm/vllm/v1/engine/output_processor.py", line 269, in process_outputs self._update_stats_from_output(req_state, engine_core_output, File "/vllm-project/vllm/vllm/v1/engine/output_processor.py", line 344, in _update_stats_from_output iteration_stats.update_from_output(engine_core_output, File "/vllm-project/vllm/vllm/v1/metrics/stats.py", line 121, in update_from_output self.update_from_events(output.request_id, output.events, File "/vllm-project/vllm/vllm/v1/metrics/stats.py", line 150, in update_from_events LoRARequestStates.scheduled_request(lora_stats, req_id) File "/vllm-project/vllm/vllm/v1/metrics/stats.py", line 224, in scheduled_request lora_stats.waiting_requests.remove(request_id) KeyError: 'cmpl-105f50b72108493bb3ffdb4f6fc6ea72-0' ``` Signed-off-by: Mark McLoughlin --- vllm/v1/metrics/stats.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index 14ec7d2d7463..36317fc9aeee 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -150,6 +150,7 @@ def update_from_events(self, req_id: str, events: list["EngineCoreEvent"], LoRARequestStates.scheduled_request(lora_stats, req_id) elif event.type == EngineCoreEventType.PREEMPTED: self.num_preempted_reqs += 1 + LoRARequestStates.preempted_request(lora_stats, req_id) def update_from_finished_request(self, finish_reason: "FinishReason", num_prompt_tokens: int, @@ -224,6 +225,13 @@ def scheduled_request(lora_stats: Optional[LoRAStats], request_id: str): lora_stats.waiting_requests.remove(request_id) lora_stats.running_requests.add(request_id) + @staticmethod + def preempted_request(lora_stats: Optional[LoRAStats], request_id: str): + if lora_stats is None: + return + lora_stats.running_requests.remove(request_id) + lora_stats.waiting_requests.add(request_id) + def update_iteration_stats(self, iteration_stats: Optional[IterationStats]): if iteration_stats is None: