We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8a8927b commit ec7b892Copy full SHA for ec7b892
vllm/v1/worker/gpu_model_runner.py
@@ -2060,7 +2060,8 @@ def execute_model(
2060
2061
# Apply structured output bitmasks if present
2062
if scheduler_output.grammar_bitmask is not None:
2063
- self.apply_grammar_bitmask(scheduler_output, logits)
+ apply_grammar_bitmask(scheduler_output, self.input_batch,
2064
+ logits, self.device)
2065
2066
with record_function_or_nullcontext("Sample"):
2067
sampler_output = self._sample(logits, spec_decode_metadata)
0 commit comments