File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -123,6 +123,13 @@ def input_processor_for_mllama(
123
123
124
124
assert is_list_of (image_data , Image .Image )
125
125
126
+ num_image_tokens = dec_inputs ['prompt_token_ids' ].count (
127
+ MLLAMA_IMAGE_TOKEN_ID )
128
+ if num_image_tokens != len (image_data ):
129
+ raise ValueError (
130
+ f"The number of image tokens ({ num_image_tokens } ) must be"
131
+ f" the same as the number of images ({ len (image_data )} )" )
132
+
126
133
# Since only the last group of consecutive images
127
134
# are attended by the decoded tokens, we only need to
128
135
# get the number of tiles for those images.
@@ -1493,6 +1500,8 @@ def convert_sparse_cross_attention_mask_to_dense(
1493
1500
dense_mask [seq_start + start :seq_start + end ,
1494
1501
tile_start :tile_start + tile ] = 1
1495
1502
tile_start += tile
1503
+ assert ts != - 1
1504
+ assert td != 0
1496
1505
tile_range_for_decode .append ((ts , ts + td ))
1497
1506
seq_start += length
1498
1507
You can’t perform that action at this time.
0 commit comments