Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def _process_multimodal(
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Mapping[str, object]],
lora_request: Optional[LoRARequest],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""
Apply the model's multi-modal processor to a multi-modal prompt,
Expand All @@ -274,14 +275,16 @@ def _process_multimodal(
if mm_processor_kwargs is None:
mm_processor_kwargs = {}

return mm_processor.apply(prompt, mm_data, mm_processor_kwargs)
return mm_processor.apply(prompt, mm_data, mm_processor_kwargs,
return_mm_hashes)

async def _process_multimodal_async(
self,
prompt: Union[str, List[int]],
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Mapping[str, object]],
lora_request: Optional[LoRARequest],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""Async version of :meth:`_process_multimodal`."""
# At the moment on model (PrithviGeoSpatialMAE) requires to be
Expand All @@ -299,13 +302,15 @@ async def _process_multimodal_async(
if mm_processor_kwargs is None:
mm_processor_kwargs = {}

return mm_processor.apply(prompt, mm_data, mm_processor_kwargs)
return mm_processor.apply(prompt, mm_data, mm_processor_kwargs,
return_mm_hashes)

def _prompt_to_llm_inputs(
self,
prompt: SingletonPrompt,
request_id: str,
lora_request: Optional[LoRARequest] = None,
return_mm_hashes: bool = False,
) -> SingletonInputs:
"""
Extract the singleton inputs from a prompt.
Expand All @@ -315,6 +320,7 @@ def _prompt_to_llm_inputs(
* request_id
* prompt: single encoder or decoder input prompt
* lora_request: this is only valid for decoder prompts
* return_mm_hashes: whether to return multimodal hashes

Returns:

Expand Down Expand Up @@ -349,6 +355,7 @@ def _prompt_to_llm_inputs(
multi_modal_data,
mm_processor_kwargs,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)

return token_inputs(
Expand Down Expand Up @@ -695,6 +702,7 @@ def _process_decoder_only_prompt(
request_id: str,
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> DecoderOnlyInputs:
"""
For decoder-only models:
Expand All @@ -706,6 +714,7 @@ def _process_decoder_only_prompt(
* request_id
* lora_request
* prompt_adapter_request
* return_mm_hashes

Returns:

Expand All @@ -729,6 +738,7 @@ async def _process_decoder_only_prompt_async(
request_id: str,
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> DecoderOnlyInputs:
"""Async version of :meth:`_process_decoder_only_prompt`."""
prompt_comps = await self._prompt_to_llm_inputs_async(
Expand All @@ -748,9 +758,13 @@ def preprocess(
request_id: str,
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> ProcessorInputs:
"""Preprocess the input prompt."""
if self.model_config.is_encoder_decoder:
assert not return_mm_hashes, (
"Multimodal hashes for encoder-decoder models should not be ",
"returned until they are supported on vLLM V1.")
# Encoder-decoder model requires special mapping of
# input prompts to encoder & decoder
return self._process_encoder_decoder_prompt(
Expand All @@ -768,6 +782,7 @@ def preprocess(
request_id=request_id,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
return_mm_hashes=return_mm_hashes,
)

async def preprocess_async(
Expand All @@ -776,9 +791,13 @@ async def preprocess_async(
request_id: str,
lora_request: Optional[LoRARequest] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> ProcessorInputs:
"""Async version of :meth:`preprocess`."""
if self.model_config.is_encoder_decoder:
assert not return_mm_hashes, (
"Multimodal hashes for encoder-decoder models should not be ",
"returned until they are supported on vLLM V1.")
# Encoder-decoder model requires special mapping of
# input prompts to encoder & decoder
return await self._process_encoder_decoder_prompt_async(
Expand All @@ -796,4 +815,5 @@ async def preprocess_async(
request_id=request_id,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
return_mm_hashes=return_mm_hashes,
)
4 changes: 3 additions & 1 deletion vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
hf_config = self.info.get_hf_config()
image_token_id = hf_config.image_token_index
Expand All @@ -777,7 +778,8 @@ def apply(
image_height=-1,
)

result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs,
return_mm_hashes)

mm_items = self._to_mm_items(mm_data)
mm_item_counts = mm_items.get_all_counts()
Expand Down
4 changes: 3 additions & 1 deletion vllm/model_executor/models/minicpmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ def apply(
prompt: Union[str, List[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
supported_mm_modalities = self.info.get_supported_mm_modalities()
if isinstance(prompt, list):
Expand All @@ -791,7 +792,8 @@ def apply(
[index for index, m in enumerate(matches) if m == modality])
for modality in supported_mm_modalities
}
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs,
return_mm_hashes)
# Exclude <image_id>x</image_id> from placeholders
if "image" in result["mm_placeholders"] and \
self.info.get_model_version() == (2, 6):
Expand Down
4 changes: 3 additions & 1 deletion vllm/model_executor/models/mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,10 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalEncDecInputs:
mm_inputs = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
mm_inputs = super().apply(prompt, mm_data, hf_processor_mm_kwargs,
return_mm_hashes)

# Check that the number of image tokens in the decoder prompt matches
# the number of images provided in mm_data
Expand Down
1 change: 1 addition & 0 deletions vllm/model_executor/models/prithvi_geospatial_mae.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
mm_kwargs = {}

Expand Down
8 changes: 5 additions & 3 deletions vllm/multimodal/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from transformers import BatchFeature, PretrainedConfig, ProcessorMixin
from typing_extensions import assert_never

import vllm.envs as envs
from vllm.inputs import InputProcessingContext
from vllm.logger import init_logger
from vllm.transformers_utils.tokenizer import (AnyTokenizer, decode_tokens,
Expand Down Expand Up @@ -1435,6 +1434,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""
Process multi-modal inputs to be used in vLLM.
Expand All @@ -1451,11 +1451,11 @@ def apply(
"""
mm_items = self._to_mm_items(mm_data)

# Create MM hashes (only used in V1)
# Create MM hashes to be returned (only used in V1)
# TODO: Use these hash keys for caching operations in apply_hf_processor
# instead of rehashing.

if envs.VLLM_USE_V1:
if return_mm_hashes:
model_id = self.info.model_id
mm_hashes = {
modality: [
Expand Down Expand Up @@ -1554,6 +1554,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
return_mm_hashes: bool = False,
) -> MultiModalEncDecInputs:
"""
Process multi-modal inputs to be used in vLLM.
Expand All @@ -1567,6 +1568,7 @@ def apply(
encoder_prompt,
mm_data,
hf_processor_mm_kwargs,
return_mm_hashes,
)

tokenizer = self.info.get_tokenizer()
Expand Down
1 change: 1 addition & 0 deletions vllm/v1/engine/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def process_inputs(
request_id=request_id,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
return_mm_hashes=self.use_hash,
)
eos_token_id = self.input_preprocessor.get_eos_token_id(lora_request)

Expand Down