diff --git a/vllm/v1/core/encoder_cache_manager.py b/vllm/v1/core/encoder_cache_manager.py index 651bc01aa5cf..13ad14e45b32 100644 --- a/vllm/v1/core/encoder_cache_manager.py +++ b/vllm/v1/core/encoder_cache_manager.py @@ -54,7 +54,7 @@ def free_encoder_input(self, request: Request, input_id: int) -> None: def free(self, request: Request) -> None: """Free all cached input ids for the request.""" - input_ids = self.get_cached_input_ids(request) + input_ids = self.get_cached_input_ids(request).copy() for input_id in input_ids: self.free_encoder_input(request, input_id)