1
1
import warnings
2
+ from typing import Optional
2
3
3
4
import pytest
4
5
from PIL import Image
5
6
6
7
from vllm .assets .image import ImageAsset
7
8
from vllm .config import ModelConfig
8
- from vllm .entrypoints .chat_utils import parse_chat_messages
9
+ from vllm .entrypoints .chat_utils import (parse_chat_messages ,
10
+ parse_chat_messages_futures )
11
+ from vllm .multimodal import MultiModalDataDict
9
12
from vllm .multimodal .utils import encode_image_base64
10
13
from vllm .transformers_utils .tokenizer_group import TokenizerGroup
11
14
@@ -42,10 +45,28 @@ def image_url():
42
45
return f"data:image/jpeg;base64,{ base64 } "
43
46
44
47
45
- @pytest .mark .asyncio
46
- async def test_parse_chat_messages_with_image_url (phi3v_model_config ,
47
- phi3v_tokenizer , image_url ):
48
- conversation , mm_future = parse_chat_messages ([{
48
+ def _assert_mm_data_is_image_input (
49
+ mm_data : Optional [MultiModalDataDict ],
50
+ image_count : int ,
51
+ ) -> None :
52
+ assert mm_data is not None
53
+ assert set (mm_data .keys ()) == {"image" }
54
+
55
+ image_data = mm_data .get ("image" )
56
+ assert image_data is not None
57
+
58
+ if image_count == 1 :
59
+ assert isinstance (image_data , Image .Image )
60
+ else :
61
+ assert isinstance (image_data , list ) and len (image_data ) == image_count
62
+
63
+
64
+ def test_parse_chat_messages_single_image (
65
+ phi3v_model_config ,
66
+ phi3v_tokenizer ,
67
+ image_url ,
68
+ ):
69
+ conversation , mm_data = parse_chat_messages ([{
49
70
"role" :
50
71
"user" ,
51
72
"content" : [{
@@ -63,15 +84,42 @@ async def test_parse_chat_messages_with_image_url(phi3v_model_config,
63
84
"role" : "user" ,
64
85
"content" : "<|image_1|>\n What's in the image?"
65
86
}]
66
- mm_data = await mm_future
67
- assert set (mm_data .keys ()) == {"image" }
68
- assert isinstance (mm_data ["image" ], Image .Image )
87
+ _assert_mm_data_is_image_input (mm_data , 1 )
69
88
70
89
71
90
@pytest .mark .asyncio
72
- async def test_parse_chat_messages_multiple_images (phi3v_model_config ,
73
- phi3v_tokenizer , image_url ):
74
- conversation , mm_future = parse_chat_messages ([{
91
+ async def test_parse_chat_messages_single_image_async (
92
+ phi3v_model_config ,
93
+ phi3v_tokenizer ,
94
+ image_url ,
95
+ ):
96
+ conversation , mm_future = parse_chat_messages_futures ([{
97
+ "role" :
98
+ "user" ,
99
+ "content" : [{
100
+ "type" : "image_url" ,
101
+ "image_url" : {
102
+ "url" : image_url
103
+ }
104
+ }, {
105
+ "type" : "text" ,
106
+ "text" : "What's in the image?"
107
+ }]
108
+ }], phi3v_model_config , phi3v_tokenizer )
109
+
110
+ assert conversation == [{
111
+ "role" : "user" ,
112
+ "content" : "<|image_1|>\n What's in the image?"
113
+ }]
114
+ _assert_mm_data_is_image_input (await mm_future , 1 )
115
+
116
+
117
+ def test_parse_chat_messages_multiple_images (
118
+ phi3v_model_config ,
119
+ phi3v_tokenizer ,
120
+ image_url ,
121
+ ):
122
+ conversation , mm_data = parse_chat_messages ([{
75
123
"role" :
76
124
"user" ,
77
125
"content" : [{
@@ -96,15 +144,49 @@ async def test_parse_chat_messages_multiple_images(phi3v_model_config,
96
144
"content" :
97
145
"<|image_1|>\n <|image_2|>\n What's in these images?"
98
146
}]
99
- mm_data = await mm_future
100
- assert set (mm_data .keys ()) == {"image" }
101
- assert len (mm_data ["image" ]) == 2
147
+ _assert_mm_data_is_image_input (mm_data , 2 )
102
148
103
149
104
150
@pytest .mark .asyncio
105
- async def test_parse_chat_messages_placeholder_already_in_prompt (
106
- phi3v_model_config , phi3v_tokenizer , image_url ):
107
- conversation , mm_future = parse_chat_messages ([{
151
+ async def test_parse_chat_messages_multiple_images_async (
152
+ phi3v_model_config ,
153
+ phi3v_tokenizer ,
154
+ image_url ,
155
+ ):
156
+ conversation , mm_future = parse_chat_messages_futures ([{
157
+ "role" :
158
+ "user" ,
159
+ "content" : [{
160
+ "type" : "image_url" ,
161
+ "image_url" : {
162
+ "url" : image_url
163
+ }
164
+ }, {
165
+ "type" : "image_url" ,
166
+ "image_url" : {
167
+ "url" : image_url
168
+ }
169
+ }, {
170
+ "type" : "text" ,
171
+ "text" : "What's in these images?"
172
+ }]
173
+ }], phi3v_model_config , phi3v_tokenizer )
174
+
175
+ assert conversation == [{
176
+ "role" :
177
+ "user" ,
178
+ "content" :
179
+ "<|image_1|>\n <|image_2|>\n What's in these images?"
180
+ }]
181
+ _assert_mm_data_is_image_input (await mm_future , 2 )
182
+
183
+
184
+ def test_parse_chat_messages_placeholder_already_in_prompt (
185
+ phi3v_model_config ,
186
+ phi3v_tokenizer ,
187
+ image_url ,
188
+ ):
189
+ conversation , mm_data = parse_chat_messages ([{
108
190
"role" :
109
191
"user" ,
110
192
"content" : [{
@@ -131,15 +213,15 @@ async def test_parse_chat_messages_placeholder_already_in_prompt(
131
213
"content" :
132
214
"What's in <|image_1|> and how does it compare to <|image_2|>?"
133
215
}]
134
- mm_data = await mm_future
135
- assert set (mm_data .keys ()) == {"image" }
136
- assert len (mm_data ["image" ]) == 2
216
+ _assert_mm_data_is_image_input (mm_data , 2 )
137
217
138
218
139
- @pytest .mark .asyncio
140
- async def test_parse_chat_messages_placeholder_one_already_in_prompt (
141
- phi3v_model_config , phi3v_tokenizer , image_url ):
142
- conversation , mm_future = parse_chat_messages ([{
219
+ def test_parse_chat_messages_placeholder_one_already_in_prompt (
220
+ phi3v_model_config ,
221
+ phi3v_tokenizer ,
222
+ image_url ,
223
+ ):
224
+ conversation , mm_data = parse_chat_messages ([{
143
225
"role" :
144
226
"user" ,
145
227
"content" : [{
@@ -167,15 +249,15 @@ async def test_parse_chat_messages_placeholder_one_already_in_prompt(
167
249
"<|image_2|>\n What's in <|image_1|> and how does it compare to the "
168
250
"other one?"
169
251
}]
170
- mm_data = await mm_future
171
- assert set (mm_data .keys ()) == {"image" }
172
- assert len (mm_data ["image" ]) == 2
252
+ _assert_mm_data_is_image_input (mm_data , 2 )
173
253
174
254
175
- @pytest .mark .asyncio
176
- async def test_parse_chat_messages_multiple_images_across_messages (
177
- phi3v_model_config , phi3v_tokenizer , image_url ):
178
- conversation , mm_future = parse_chat_messages ([{
255
+ def test_parse_chat_messages_multiple_images_across_messages (
256
+ phi3v_model_config ,
257
+ phi3v_tokenizer ,
258
+ image_url ,
259
+ ):
260
+ conversation , mm_data = parse_chat_messages ([{
179
261
"role" :
180
262
"user" ,
181
263
"content" : [{
@@ -218,14 +300,14 @@ async def test_parse_chat_messages_multiple_images_across_messages(
218
300
"content" : "<|image_2|>\n What about this one?"
219
301
},
220
302
]
221
- mm_data = await mm_future
222
- assert set (mm_data .keys ()) == {"image" }
223
- assert len (mm_data ["image" ]) == 2
303
+ _assert_mm_data_is_image_input (mm_data , 2 )
224
304
225
305
226
- @pytest .mark .asyncio
227
- async def test_parse_chat_messages_rejects_too_many_images_in_one_message (
228
- phi3v_model_config , phi3v_tokenizer , image_url ):
306
+ def test_parse_chat_messages_rejects_too_many_images_in_one_message (
307
+ phi3v_model_config ,
308
+ phi3v_tokenizer ,
309
+ image_url ,
310
+ ):
229
311
with warnings .catch_warnings ():
230
312
warnings .filterwarnings (
231
313
"ignore" ,
@@ -259,9 +341,11 @@ async def test_parse_chat_messages_rejects_too_many_images_in_one_message(
259
341
}], phi3v_model_config , phi3v_tokenizer )
260
342
261
343
262
- @pytest .mark .asyncio
263
- async def test_parse_chat_messages_rejects_too_many_images_across_messages (
264
- phi3v_model_config , phi3v_tokenizer , image_url ):
344
+ def test_parse_chat_messages_rejects_too_many_images_across_messages (
345
+ phi3v_model_config ,
346
+ phi3v_tokenizer ,
347
+ image_url ,
348
+ ):
265
349
with warnings .catch_warnings ():
266
350
warnings .filterwarnings (
267
351
"ignore" ,
0 commit comments