You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: vllm/entrypoints/llm.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -396,6 +396,7 @@ def beam_search(
396
396
beam_width: int,
397
397
max_tokens: int,
398
398
ignore_eos: bool=False,
399
+
temperature: float=0.0,
399
400
) ->List[BeamSearchOutput]:
400
401
"""
401
402
Generate sequences using beam search.
@@ -405,6 +406,7 @@ def beam_search(
405
406
of token IDs.
406
407
beam_width: The number of beams to keep at each step.
407
408
max_tokens: The max number of tokens to generate for each prompt.
409
+
temperature: The temperature to use for generation.
408
410
409
411
TODO: how does beam search work together with length penalty, frequency
410
412
penalty, and stopping criteria, etc.?
@@ -416,7 +418,7 @@ def beam_search(
416
418
# at https://github.com/huggingface/transformers/blob/e15687fffe5c9d20598a19aeab721ae0a7580f8a/src/transformers/generation/beam_search.py#L534 # noqa
0 commit comments