diff --git a/docs/source/models/generative_models.md b/docs/source/models/generative_models.md index f31e5715d175..06daa04f2dea 100644 --- a/docs/source/models/generative_models.md +++ b/docs/source/models/generative_models.md @@ -54,14 +54,16 @@ The {class}`~vllm.LLM.beam_search` method implements [beam search](https://huggi For example, to search using 5 beams and output at most 50 tokens: ```python +from vllm import LLM +from vllm.sampling_params import BeamSearchParams + llm = LLM(model="facebook/opt-125m") params = BeamSearchParams(beam_width=5, max_tokens=50) -outputs = llm.generate("Hello, my name is", params) +outputs = llm.beam_search([{"prompt": "Hello, my name is "}], params) for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + generated_text = output.sequences[0].text + print(f"Generated text: {generated_text!r}") ``` ### `LLM.chat`