@@ -659,50 +659,52 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI,
659
659
[MODEL_NAME , "zephyr-lora" ],
660
660
)
661
661
async def test_batch_completions (client : openai .AsyncOpenAI , model_name : str ):
662
- # test simple list
663
- batch = await client .completions .create (
664
- model = model_name ,
665
- prompt = ["Hello, my name is" , "Hello, my name is" ],
666
- max_tokens = 5 ,
667
- temperature = 0.0 ,
668
- )
669
- assert len (batch .choices ) == 2
670
- assert batch .choices [0 ].text == batch .choices [1 ].text
671
-
672
- # test n = 2
673
- batch = await client .completions .create (
674
- model = model_name ,
675
- prompt = ["Hello, my name is" , "Hello, my name is" ],
676
- n = 2 ,
677
- max_tokens = 5 ,
678
- temperature = 0.0 ,
679
- extra_body = dict (
680
- # NOTE: this has to be true for n > 1 in vLLM, but not necessary
681
- # for official client.
682
- use_beam_search = True ),
683
- )
684
- assert len (batch .choices ) == 4
685
- assert batch .choices [0 ].text != batch .choices [
686
- 1 ].text , "beam search should be different"
687
- assert batch .choices [0 ].text == batch .choices [
688
- 2 ].text , "two copies of the same prompt should be the same"
689
- assert batch .choices [1 ].text == batch .choices [
690
- 3 ].text , "two copies of the same prompt should be the same"
662
+ # test both text and token IDs
663
+ for prompts in (["Hello, my name is" ] * 2 , [[0 , 0 , 0 , 0 , 0 ]] * 2 ):
664
+ # test simple list
665
+ batch = await client .completions .create (
666
+ model = model_name ,
667
+ prompt = prompts ,
668
+ max_tokens = 5 ,
669
+ temperature = 0.0 ,
670
+ )
671
+ assert len (batch .choices ) == 2
672
+ assert batch .choices [0 ].text == batch .choices [1 ].text
691
673
692
- # test streaming
693
- batch = await client .completions .create (
694
- model = model_name ,
695
- prompt = ["Hello, my name is" , "Hello, my name is" ],
696
- max_tokens = 5 ,
697
- temperature = 0.0 ,
698
- stream = True ,
699
- )
700
- texts = ["" ] * 2
701
- async for chunk in batch :
702
- assert len (chunk .choices ) == 1
703
- choice = chunk .choices [0 ]
704
- texts [choice .index ] += choice .text
705
- assert texts [0 ] == texts [1 ]
674
+ # test n = 2
675
+ batch = await client .completions .create (
676
+ model = model_name ,
677
+ prompt = prompts ,
678
+ n = 2 ,
679
+ max_tokens = 5 ,
680
+ temperature = 0.0 ,
681
+ extra_body = dict (
682
+ # NOTE: this has to be true for n > 1 in vLLM, but not necessary
683
+ # for official client.
684
+ use_beam_search = True ),
685
+ )
686
+ assert len (batch .choices ) == 4
687
+ assert batch .choices [0 ].text != batch .choices [
688
+ 1 ].text , "beam search should be different"
689
+ assert batch .choices [0 ].text == batch .choices [
690
+ 2 ].text , "two copies of the same prompt should be the same"
691
+ assert batch .choices [1 ].text == batch .choices [
692
+ 3 ].text , "two copies of the same prompt should be the same"
693
+
694
+ # test streaming
695
+ batch = await client .completions .create (
696
+ model = model_name ,
697
+ prompt = prompts ,
698
+ max_tokens = 5 ,
699
+ temperature = 0.0 ,
700
+ stream = True ,
701
+ )
702
+ texts = ["" ] * 2
703
+ async for chunk in batch :
704
+ assert len (chunk .choices ) == 1
705
+ choice = chunk .choices [0 ]
706
+ texts [choice .index ] += choice .text
707
+ assert texts [0 ] == texts [1 ]
706
708
707
709
708
710
@pytest .mark .asyncio
0 commit comments