Skip to content

Commit d45cbe7

Browse files
[Bugfix] Check that number of images matches number of <|image|> tokens with mllama (#11939)
Signed-off-by: Travis Johnson <[email protected]>
1 parent 8a57940 commit d45cbe7

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

vllm/model_executor/models/mllama.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ def input_processor_for_mllama(
123123

124124
assert is_list_of(image_data, Image.Image)
125125

126+
num_image_tokens = dec_inputs['prompt_token_ids'].count(
127+
MLLAMA_IMAGE_TOKEN_ID)
128+
if num_image_tokens != len(image_data):
129+
raise ValueError(
130+
f"The number of image tokens ({num_image_tokens}) must be"
131+
f" the same as the number of images ({len(image_data)})")
132+
126133
# Since only the last group of consecutive images
127134
# are attended by the decoded tokens, we only need to
128135
# get the number of tiles for those images.
@@ -1493,6 +1500,8 @@ def convert_sparse_cross_attention_mask_to_dense(
14931500
dense_mask[seq_start + start:seq_start + end,
14941501
tile_start:tile_start + tile] = 1
14951502
tile_start += tile
1503+
assert ts != -1
1504+
assert td != 0
14961505
tile_range_for_decode.append((ts, ts + td))
14971506
seq_start += length
14981507

0 commit comments

Comments
 (0)