Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 32d5ecc

Browse files
DarkLight1337Robert Shaw
authored andcommitted
[CI/Build] Test both text and token IDs in batched OpenAI Completions API (vllm-project#5568)
1 parent 0393d45 commit 32d5ecc

File tree

1 file changed

+45
-43
lines changed

1 file changed

+45
-43
lines changed

tests/entrypoints/test_openai_server.py

Lines changed: 45 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -659,50 +659,52 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI,
659659
[MODEL_NAME, "zephyr-lora"],
660660
)
661661
async def test_batch_completions(client: openai.AsyncOpenAI, model_name: str):
662-
# test simple list
663-
batch = await client.completions.create(
664-
model=model_name,
665-
prompt=["Hello, my name is", "Hello, my name is"],
666-
max_tokens=5,
667-
temperature=0.0,
668-
)
669-
assert len(batch.choices) == 2
670-
assert batch.choices[0].text == batch.choices[1].text
671-
672-
# test n = 2
673-
batch = await client.completions.create(
674-
model=model_name,
675-
prompt=["Hello, my name is", "Hello, my name is"],
676-
n=2,
677-
max_tokens=5,
678-
temperature=0.0,
679-
extra_body=dict(
680-
# NOTE: this has to be true for n > 1 in vLLM, but not necessary
681-
# for official client.
682-
use_beam_search=True),
683-
)
684-
assert len(batch.choices) == 4
685-
assert batch.choices[0].text != batch.choices[
686-
1].text, "beam search should be different"
687-
assert batch.choices[0].text == batch.choices[
688-
2].text, "two copies of the same prompt should be the same"
689-
assert batch.choices[1].text == batch.choices[
690-
3].text, "two copies of the same prompt should be the same"
662+
# test both text and token IDs
663+
for prompts in (["Hello, my name is"] * 2, [[0, 0, 0, 0, 0]] * 2):
664+
# test simple list
665+
batch = await client.completions.create(
666+
model=model_name,
667+
prompt=prompts,
668+
max_tokens=5,
669+
temperature=0.0,
670+
)
671+
assert len(batch.choices) == 2
672+
assert batch.choices[0].text == batch.choices[1].text
691673

692-
# test streaming
693-
batch = await client.completions.create(
694-
model=model_name,
695-
prompt=["Hello, my name is", "Hello, my name is"],
696-
max_tokens=5,
697-
temperature=0.0,
698-
stream=True,
699-
)
700-
texts = [""] * 2
701-
async for chunk in batch:
702-
assert len(chunk.choices) == 1
703-
choice = chunk.choices[0]
704-
texts[choice.index] += choice.text
705-
assert texts[0] == texts[1]
674+
# test n = 2
675+
batch = await client.completions.create(
676+
model=model_name,
677+
prompt=prompts,
678+
n=2,
679+
max_tokens=5,
680+
temperature=0.0,
681+
extra_body=dict(
682+
# NOTE: this has to be true for n > 1 in vLLM, but not necessary
683+
# for official client.
684+
use_beam_search=True),
685+
)
686+
assert len(batch.choices) == 4
687+
assert batch.choices[0].text != batch.choices[
688+
1].text, "beam search should be different"
689+
assert batch.choices[0].text == batch.choices[
690+
2].text, "two copies of the same prompt should be the same"
691+
assert batch.choices[1].text == batch.choices[
692+
3].text, "two copies of the same prompt should be the same"
693+
694+
# test streaming
695+
batch = await client.completions.create(
696+
model=model_name,
697+
prompt=prompts,
698+
max_tokens=5,
699+
temperature=0.0,
700+
stream=True,
701+
)
702+
texts = [""] * 2
703+
async for chunk in batch:
704+
assert len(chunk.choices) == 1
705+
choice = chunk.choices[0]
706+
texts[choice.index] += choice.text
707+
assert texts[0] == texts[1]
706708

707709

708710
@pytest.mark.asyncio

0 commit comments

Comments
 (0)