diff --git a/pyproject.toml b/pyproject.toml index 04e0c9e67eb2..1965c61e68cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,11 +74,8 @@ exclude = [ "vllm/distributed/**/*.py" = ["UP006", "UP035"] "vllm/engine/**/*.py" = ["UP006", "UP035"] "vllm/executor/**/*.py" = ["UP006", "UP035"] -"vllm/inputs/**/*.py" = ["UP006", "UP035"] -"vllm/logging_utils/**/*.py" = ["UP006", "UP035"] "vllm/lora/**/*.py" = ["UP006", "UP035"] "vllm/model_executor/**/*.py" = ["UP006", "UP035"] -"vllm/multimodal/**/*.py" = ["UP006", "UP035"] "vllm/platforms/**/*.py" = ["UP006", "UP035"] "vllm/plugins/**/*.py" = ["UP006", "UP035"] "vllm/profiler/**/*.py" = ["UP006", "UP035"] @@ -87,9 +84,7 @@ exclude = [ "vllm/third_party/**/*.py" = ["UP006", "UP035"] "vllm/transformers_utils/**/*.py" = ["UP006", "UP035"] "vllm/triton_utils/**/*.py" = ["UP006", "UP035"] -"vllm/usage/**/*.py" = ["UP006", "UP035"] "vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"] -"vllm/assets/**/*.py" = ["UP006", "UP035"] "vllm/worker/**/*.py" = ["UP006", "UP035"] [tool.ruff.lint] diff --git a/vllm/assets/video.py b/vllm/assets/video.py index 494cfc38381c..e45e1a65f890 100644 --- a/vllm/assets/video.py +++ b/vllm/assets/video.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from functools import lru_cache -from typing import List, Literal +from typing import Literal import cv2 import numpy as np @@ -58,7 +58,7 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray: def video_to_pil_images_list(path: str, - num_frames: int = -1) -> List[Image.Image]: + num_frames: int = -1) -> list[Image.Image]: frames = video_to_ndarrays(path, num_frames) return [ Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) @@ -72,7 +72,7 @@ class VideoAsset: num_frames: int = -1 @property - def pil_images(self) -> List[Image.Image]: + def pil_images(self) -> list[Image.Image]: video_path = download_video_asset(self.name) ret = video_to_pil_images_list(video_path, self.num_frames) return ret diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py index 2ffebeee392a..138a8f61107b 100644 --- a/vllm/inputs/data.py +++ b/vllm/inputs/data.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 +from collections.abc import Iterable from dataclasses import dataclass from functools import cached_property -from typing import (TYPE_CHECKING, Any, Dict, Generic, Iterable, List, Literal, - Optional, Tuple, Union, cast) +from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, Union, cast import torch from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never @@ -26,7 +26,7 @@ class TextPrompt(TypedDict): if the model supports it. """ - mm_processor_kwargs: NotRequired[Dict[str, Any]] + mm_processor_kwargs: NotRequired[dict[str, Any]] """ Optional multi-modal processor kwargs to be forwarded to the multimodal input mapper & processor. Note that if multiple modalities @@ -38,10 +38,10 @@ class TextPrompt(TypedDict): class TokensPrompt(TypedDict): """Schema for a tokenized prompt.""" - prompt_token_ids: List[int] + prompt_token_ids: list[int] """A list of token IDs to pass to the model.""" - token_type_ids: NotRequired[List[int]] + token_type_ids: NotRequired[list[int]] """A list of token type IDs to pass to the cross encoder model.""" multi_modal_data: NotRequired["MultiModalDataDict"] @@ -50,7 +50,7 @@ class TokensPrompt(TypedDict): if the model supports it. """ - mm_processor_kwargs: NotRequired[Dict[str, Any]] + mm_processor_kwargs: NotRequired[dict[str, Any]] """ Optional multi-modal processor kwargs to be forwarded to the multimodal input mapper & processor. Note that if multiple modalities @@ -115,7 +115,7 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]): decoder_prompt: Optional[_T2_co] - mm_processor_kwargs: NotRequired[Dict[str, Any]] + mm_processor_kwargs: NotRequired[dict[str, Any]] PromptType = Union[SingletonPrompt, ExplicitEncoderDecoderPrompt] @@ -136,10 +136,10 @@ class TokenInputs(TypedDict): type: Literal["token"] """The type of inputs.""" - prompt_token_ids: List[int] + prompt_token_ids: list[int] """The token IDs of the prompt.""" - token_type_ids: NotRequired[List[int]] + token_type_ids: NotRequired[list[int]] """The token type IDs of the prompt.""" prompt: NotRequired[str] @@ -164,12 +164,12 @@ class TokenInputs(TypedDict): Placeholder ranges for the multi-modal data. """ - multi_modal_hashes: NotRequired[List[str]] + multi_modal_hashes: NotRequired[list[str]] """ The hashes of the multi-modal data. """ - mm_processor_kwargs: NotRequired[Dict[str, Any]] + mm_processor_kwargs: NotRequired[dict[str, Any]] """ Optional multi-modal processor kwargs to be forwarded to the multimodal input mapper & processor. Note that if multiple modalities @@ -179,14 +179,14 @@ class TokenInputs(TypedDict): def token_inputs( - prompt_token_ids: List[int], - token_type_ids: Optional[List[int]] = None, + prompt_token_ids: list[int], + token_type_ids: Optional[list[int]] = None, prompt: Optional[str] = None, multi_modal_data: Optional["MultiModalDataDict"] = None, multi_modal_inputs: Optional["MultiModalKwargs"] = None, - multi_modal_hashes: Optional[List[str]] = None, + multi_modal_hashes: Optional[list[str]] = None, multi_modal_placeholders: Optional["MultiModalPlaceholderDict"] = None, - mm_processor_kwargs: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[dict[str, Any]] = None, ) -> TokenInputs: """Construct :class:`TokenInputs` from optional values.""" inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids) @@ -255,7 +255,7 @@ def prompt(self) -> Optional[str]: assert_never(inputs) # type: ignore[arg-type] @cached_property - def prompt_token_ids(self) -> List[int]: + def prompt_token_ids(self) -> list[int]: inputs = self.inputs if inputs["type"] == "token" or inputs["type"] == "multimodal": @@ -264,7 +264,7 @@ def prompt_token_ids(self) -> List[int]: assert_never(inputs) # type: ignore[arg-type] @cached_property - def token_type_ids(self) -> List[int]: + def token_type_ids(self) -> list[int]: inputs = self.inputs if inputs["type"] == "token" or inputs["type"] == "multimodal": @@ -294,7 +294,7 @@ def multi_modal_data(self) -> "MultiModalDataDict": assert_never(inputs) # type: ignore[arg-type] @cached_property - def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]: + def multi_modal_inputs(self) -> Union[dict, "MultiModalKwargs"]: inputs = self.inputs if inputs["type"] == "token": @@ -306,7 +306,7 @@ def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]: assert_never(inputs) # type: ignore[arg-type] @cached_property - def multi_modal_hashes(self) -> List[str]: + def multi_modal_hashes(self) -> list[str]: inputs = self.inputs if inputs["type"] == "token": @@ -331,7 +331,7 @@ def multi_modal_placeholders(self) -> "MultiModalPlaceholderDict": assert_never(inputs) # type: ignore[arg-type] @cached_property - def mm_processor_kwargs(self) -> Dict[str, Any]: + def mm_processor_kwargs(self) -> dict[str, Any]: inputs = self.inputs if inputs["type"] == "token": @@ -355,7 +355,7 @@ def mm_processor_kwargs(self) -> Dict[str, Any]: def build_explicit_enc_dec_prompt( encoder_prompt: _T1, decoder_prompt: Optional[_T2], - mm_processor_kwargs: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[dict[str, Any]] = None, ) -> ExplicitEncoderDecoderPrompt[_T1, _T2]: if mm_processor_kwargs is None: mm_processor_kwargs = {} @@ -368,9 +368,9 @@ def build_explicit_enc_dec_prompt( def zip_enc_dec_prompts( enc_prompts: Iterable[_T1], dec_prompts: Iterable[Optional[_T2]], - mm_processor_kwargs: Optional[Union[Iterable[Dict[str, Any]], - Dict[str, Any]]] = None, -) -> List[ExplicitEncoderDecoderPrompt[_T1, _T2]]: + mm_processor_kwargs: Optional[Union[Iterable[dict[str, Any]], + dict[str, Any]]] = None, +) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]: """ Zip encoder and decoder prompts together into a list of :class:`ExplicitEncoderDecoderPrompt` instances. @@ -380,12 +380,12 @@ def zip_enc_dec_prompts( provided, it will be zipped with the encoder/decoder prompts. """ if mm_processor_kwargs is None: - mm_processor_kwargs = cast(Dict[str, Any], {}) + mm_processor_kwargs = cast(dict[str, Any], {}) if isinstance(mm_processor_kwargs, dict): return [ build_explicit_enc_dec_prompt( encoder_prompt, decoder_prompt, - cast(Dict[str, Any], mm_processor_kwargs)) + cast(dict[str, Any], mm_processor_kwargs)) for (encoder_prompt, decoder_prompt) in zip(enc_prompts, dec_prompts) ] @@ -399,7 +399,7 @@ def zip_enc_dec_prompts( def to_enc_dec_tuple_list( enc_dec_prompts: Iterable[ExplicitEncoderDecoderPrompt[_T1, _T2]], -) -> List[Tuple[_T1, Optional[_T2]]]: +) -> list[tuple[_T1, Optional[_T2]]]: return [(enc_dec_prompt["encoder_prompt"], enc_dec_prompt["decoder_prompt"]) for enc_dec_prompt in enc_dec_prompts] diff --git a/vllm/inputs/parse.py b/vllm/inputs/parse.py index 454d9d8303b7..ed1056948d80 100644 --- a/vllm/inputs/parse.py +++ b/vllm/inputs/parse.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import List, Literal, Sequence, TypedDict, Union, cast, overload +from collections.abc import Sequence +from typing import Literal, TypedDict, Union, cast, overload from typing_extensions import TypeIs @@ -17,24 +18,24 @@ class ParsedText(TypedDict): class ParsedTokens(TypedDict): - content: List[int] + content: list[int] is_tokens: Literal[True] @overload def parse_and_batch_prompt( - prompt: Union[str, List[str]]) -> Sequence[ParsedText]: + prompt: Union[str, list[str]]) -> Sequence[ParsedText]: ... @overload def parse_and_batch_prompt( - prompt: Union[List[int], List[List[int]]]) -> Sequence[ParsedTokens]: + prompt: Union[list[int], list[list[int]]]) -> Sequence[ParsedTokens]: ... def parse_and_batch_prompt( - prompt: Union[str, List[str], List[int], List[List[int]]], + prompt: Union[str, list[str], list[int], list[list[int]]], ) -> Union[Sequence[ParsedText], Sequence[ParsedTokens]]: if isinstance(prompt, str): # case 1: a string @@ -46,16 +47,16 @@ def parse_and_batch_prompt( if is_list_of(prompt, str): # case 2: array of strings - prompt = cast(List[str], prompt) + prompt = cast(list[str], prompt) return [ ParsedText(content=elem, is_tokens=False) for elem in prompt ] if is_list_of(prompt, int): # case 3: array of tokens - prompt = cast(List[int], prompt) + prompt = cast(list[int], prompt) return [ParsedTokens(content=prompt, is_tokens=True)] if is_list_of(prompt, list): - prompt = cast(List[List[int]], prompt) + prompt = cast(list[list[int]], prompt) if len(prompt[0]) == 0: raise ValueError("please provide at least one prompt") diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index 2545635da320..f56cff292b68 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio -from typing import List, Mapping, Optional, Tuple, Union, cast +from collections.abc import Mapping +from typing import Optional, Union, cast from typing_extensions import assert_never @@ -92,7 +93,7 @@ def get_decoder_start_token_id(self) -> Optional[int]: return dec_start_token_id - def _get_default_enc_dec_decoder_prompt(self) -> List[int]: + def _get_default_enc_dec_decoder_prompt(self) -> list[int]: ''' Specifically for encoder/decoder models: generate a default decoder prompt for when @@ -130,8 +131,8 @@ def _get_default_enc_dec_decoder_prompt(self) -> List[int]: def _prepare_decoder_input_ids_for_generation( self, - decoder_input_ids: Optional[List[int]], - ) -> List[int]: + decoder_input_ids: Optional[list[int]], + ) -> list[int]: """ Prepares `decoder_input_ids` for generation with encoder-decoder models. @@ -168,9 +169,9 @@ def _prepare_decoder_input_ids_for_generation( def _apply_prompt_adapter( self, - prompt_token_ids: List[int], + prompt_token_ids: list[int], prompt_adapter_request: Optional[PromptAdapterRequest], - ) -> List[int]: + ) -> list[int]: if prompt_adapter_request: prompt_token_ids = ( [0] * prompt_adapter_request.prompt_adapter_num_virtual_tokens @@ -183,7 +184,7 @@ def _tokenize_prompt( prompt: str, request_id: str, lora_request: Optional[LoRARequest], - ) -> List[int]: + ) -> list[int]: """ Apply the model's tokenizer to a text prompt, returning the corresponding token IDs. @@ -211,7 +212,7 @@ async def _tokenize_prompt_async( prompt: str, request_id: str, lora_request: Optional[LoRARequest], - ) -> List[int]: + ) -> list[int]: """Async version of :meth:`_tokenize_prompt`.""" tokenizer = self.get_tokenizer_group() add_special_tokens = None @@ -250,7 +251,7 @@ def _can_process_multimodal(self) -> bool: def _process_multimodal( self, - prompt: Union[str, List[int]], + prompt: Union[str, list[int]], mm_data: MultiModalDataDict, mm_processor_kwargs: Optional[Mapping[str, object]], lora_request: Optional[LoRARequest], @@ -280,7 +281,7 @@ def _process_multimodal( async def _process_multimodal_async( self, - prompt: Union[str, List[int]], + prompt: Union[str, list[int]], mm_data: MultiModalDataDict, mm_processor_kwargs: Optional[Mapping[str, object]], lora_request: Optional[LoRARequest], @@ -511,7 +512,7 @@ def _separate_enc_dec_inputs_from_mm_processor_outputs( self, inputs: SingletonInputs, decoder_inputs_to_override: Optional[SingletonInputs] = None, - ) -> Tuple[SingletonInputs, SingletonInputs]: + ) -> tuple[SingletonInputs, SingletonInputs]: """ For encoder/decoder models only: Separate Encoder/Decoder inputs from a MultiModalEncDecInputs diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py index babfc4fb809c..32d7a8b3dd7b 100644 --- a/vllm/inputs/registry.py +++ b/vllm/inputs/registry.py @@ -2,9 +2,10 @@ import functools from collections import UserDict +from collections.abc import Mapping from dataclasses import dataclass -from typing import (TYPE_CHECKING, Any, Callable, Mapping, NamedTuple, - Optional, Protocol, Union) +from typing import (TYPE_CHECKING, Any, Callable, NamedTuple, Optional, + Protocol, Union) from torch import nn from transformers import BatchFeature, PretrainedConfig, ProcessorMixin diff --git a/vllm/multimodal/base.py b/vllm/multimodal/base.py index c48d07ba365b..e0b160a65047 100644 --- a/vllm/multimodal/base.py +++ b/vllm/multimodal/base.py @@ -2,9 +2,10 @@ from abc import ABC, abstractmethod from collections import defaultdict +from collections.abc import Sequence from pathlib import Path from typing import (TYPE_CHECKING, Any, Callable, Generic, NamedTuple, - Optional, Sequence, Tuple, Type, TypeVar, Union) + Optional, TypeVar, Union) from torch import nn @@ -39,7 +40,7 @@ """ _T = TypeVar("_T") -N = TypeVar("N", bound=Type[nn.Module]) +N = TypeVar("N", bound=type[nn.Module]) class MultiModalPlugin(ABC): @@ -274,7 +275,7 @@ def __init__(self): @classmethod def from_seq_group( cls, seq_group: "SequenceGroupMetadata", positions: range - ) -> Tuple[Optional[MultiModalDataDict], dict[str, + ) -> tuple[Optional[MultiModalDataDict], dict[str, "MultiModalPlaceholderMap"]]: """ Returns the multi-modal items that intersect with the portion of a diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index 7d277fd67dec..11665ef66753 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 import pickle -from typing import TYPE_CHECKING, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import TYPE_CHECKING, Optional import numpy as np import torch diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py index 98ece8f806f1..f76982ef8d72 100644 --- a/vllm/multimodal/image.py +++ b/vllm/multimodal/image.py @@ -3,7 +3,7 @@ import base64 from io import BytesIO from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Optional import torch from PIL import Image @@ -31,7 +31,7 @@ def get_data_key(self) -> str: def _get_hf_image_processor( self, model_config: "ModelConfig", - mm_processor_kwargs: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[dict[str, Any]] = None, ): if mm_processor_kwargs is None: mm_processor_kwargs = {} diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 4987cdc4a2e8..febf3ad9eea4 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -2,9 +2,9 @@ import functools from collections import UserDict +from collections.abc import Mapping, Sequence from dataclasses import dataclass -from typing import (TYPE_CHECKING, Any, Dict, Generic, Mapping, Optional, - Protocol, Sequence, Type, TypeVar) +from typing import TYPE_CHECKING, Any, Generic, Optional, Protocol, TypeVar import torch.nn as nn @@ -29,7 +29,7 @@ logger = init_logger(__name__) -N = TypeVar("N", bound=Type[nn.Module]) +N = TypeVar("N", bound=type[nn.Module]) _I = TypeVar("_I", bound=BaseProcessingInfo) _I_co = TypeVar("_I_co", bound=BaseProcessingInfo, covariant=True) @@ -83,13 +83,13 @@ def build_processor( return self.processor(info, dummy_inputs_builder, cache=cache) -class _MultiModalLimits(UserDict["ModelConfig", Dict[str, int]]): +class _MultiModalLimits(UserDict["ModelConfig", dict[str, int]]): """ Wraps `_limits_by_model` for a more informative error message when attempting to access a model that does not exist. """ - def __getitem__(self, key: "ModelConfig") -> Dict[str, int]: + def __getitem__(self, key: "ModelConfig") -> dict[str, int]: try: return super().__getitem__(key) except KeyError as exc: @@ -170,7 +170,7 @@ def map_input( self, model_config: "ModelConfig", data: MultiModalDataDict, - mm_processor_kwargs: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[dict[str, Any]] = None, ) -> MultiModalKwargs: """ Apply an input mapper to the data passed to the model. @@ -184,7 +184,7 @@ def map_input( Note: This should be called after :meth:`init_mm_limits_per_prompt`. """ - merged_dict: Dict[str, NestedTensors] = {} + merged_dict = dict[str, NestedTensors]() for data_key, data_value in data.items(): plugin = self._get_plugin(data_key) diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index 8004377191b3..0b3d3f8c79d7 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -4,7 +4,7 @@ from functools import partial from io import BytesIO from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Optional import numpy as np import numpy.typing as npt @@ -39,7 +39,7 @@ def get_data_key(self) -> str: def _get_hf_video_processor( self, model_config: "ModelConfig", - mm_processor_kwargs: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[dict[str, Any]] = None, ): if mm_processor_kwargs is None: mm_processor_kwargs = {} diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py index fbbb21c89370..2ee3f9104d19 100644 --- a/vllm/usage/usage_lib.py +++ b/vllm/usage/usage_lib.py @@ -9,7 +9,7 @@ from enum import Enum from pathlib import Path from threading import Thread -from typing import Any, Dict, Optional, Union +from typing import Any, Optional, Union from uuid import uuid4 import cpuinfo @@ -27,7 +27,7 @@ _USAGE_STATS_ENABLED = None _USAGE_STATS_SERVER = envs.VLLM_USAGE_STATS_SERVER -_GLOBAL_RUNTIME_DATA: Dict[str, Union[str, int, bool]] = {} +_GLOBAL_RUNTIME_DATA = dict[str, Union[str, int, bool]]() _USAGE_ENV_VARS_TO_COLLECT = [ "VLLM_USE_MODELSCOPE", @@ -150,7 +150,7 @@ def __init__(self) -> None: def report_usage(self, model_architecture: str, usage_context: UsageContext, - extra_kvs: Optional[Dict[str, Any]] = None) -> None: + extra_kvs: Optional[dict[str, Any]] = None) -> None: t = Thread(target=self._report_usage_worker, args=(model_architecture, usage_context, extra_kvs or {}), daemon=True) @@ -158,13 +158,13 @@ def report_usage(self, def _report_usage_worker(self, model_architecture: str, usage_context: UsageContext, - extra_kvs: Dict[str, Any]) -> None: + extra_kvs: dict[str, Any]) -> None: self._report_usage_once(model_architecture, usage_context, extra_kvs) self._report_continous_usage() def _report_usage_once(self, model_architecture: str, usage_context: UsageContext, - extra_kvs: Dict[str, Any]) -> None: + extra_kvs: dict[str, Any]) -> None: # Platform information from vllm.platforms import current_platform if current_platform.is_cuda_alike(): @@ -227,7 +227,7 @@ def _report_continous_usage(self): self._write_to_file(data) self._send_to_server(data) - def _send_to_server(self, data: Dict[str, Any]) -> None: + def _send_to_server(self, data: dict[str, Any]) -> None: try: global_http_client = global_http_connection.get_sync_client() global_http_client.post(_USAGE_STATS_SERVER, json=data) @@ -235,7 +235,7 @@ def _send_to_server(self, data: Dict[str, Any]) -> None: # silently ignore unless we are using debug log logging.debug("Failed to send usage data to server") - def _write_to_file(self, data: Dict[str, Any]) -> None: + def _write_to_file(self, data: dict[str, Any]) -> None: os.makedirs(os.path.dirname(_USAGE_STATS_JSON_PATH), exist_ok=True) Path(_USAGE_STATS_JSON_PATH).touch(exist_ok=True) with open(_USAGE_STATS_JSON_PATH, "a") as f: