From 7344a384ecf17b0b8481eabbfa535b936e626410 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Wed, 5 Mar 2025 11:29:19 -0800 Subject: [PATCH] [V1] Remove obsolete FIXME comment This was missed when merging https://github.com/vllm-project/vllm/pull/14169 and https://github.com/vllm-project/vllm/pull/14159 Signed-off-by: Nick Hill --- vllm/v1/worker/gpu_input_batch.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index c0e9ff0286d6..2fe177ea4e12 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -298,11 +298,6 @@ def add_request( if sampling_params.logit_bias is not None: self.logit_bias[req_index] = sampling_params.logit_bias - # FIXME: this implementation is incorrect. We create this mask - # then apply -inf to these specific tokens, which means we never - # select the allowed tokens! We cannot do the reverse, since - # this will impact the requests that do not have allowed_token_ids. - # This feature is currently disabled on V1 (we reject in Processor). if sampling_params.allowed_token_ids: self.has_allowed_token_ids.add(req_id) if self.allowed_token_ids_mask_cpu_tensor is None: