diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 5db843e99d6a..bc1186e5feb7 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -251,7 +251,6 @@ def __init__( # Cached reference to the GPU tensor of previously sampled tokens self.prev_sampled_token_ids: Optional[torch.Tensor] = None - self.prev_sampled_token_ids_invalid_indices: Optional[set[int]] = None self.prev_req_id_to_index: Optional[dict[str, int]] = None @property diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 0a5b56d95224..b5687c75fc3e 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2305,9 +2305,6 @@ def _bookkeeping_sync( # These will be copied into input_ids in the next step # when preparing inputs. self.input_batch.prev_sampled_token_ids = sampled_token_ids - self.input_batch.prev_sampled_token_ids_invalid_indices = ( - invalid_req_indices_set - ) self.input_batch.prev_req_id_to_index = { req_id: i for i, req_id in enumerate(self.input_batch.req_ids)