|
40 | 40 |
|
41 | 41 | from vllm.attention import AttentionMetadata
|
42 | 42 | from vllm.attention.selector import _Backend
|
43 |
| -from vllm.config import CacheConfig, MultiModalConfig |
| 43 | +from vllm.config import CacheConfig, LoRAConfig, MultiModalConfig |
44 | 44 | from vllm.distributed import get_pp_group, parallel_state
|
45 | 45 | from vllm.distributed import utils as dist_utils
|
46 | 46 | from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, DummyData,
|
|
65 | 65 | from vllm.transformers_utils.config import uses_mrope
|
66 | 66 | from vllm.transformers_utils.processor import cached_get_processor
|
67 | 67 |
|
68 |
| -from .interfaces import SupportsMultiModal, SupportsPP |
| 68 | +from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP |
69 | 69 | from .utils import (PPMissingLayer, get_vit_attn_backend,
|
70 | 70 | is_pp_missing_parameter,
|
71 | 71 | make_empty_intermediate_tensors_factory)
|
@@ -927,13 +927,37 @@ def input_processor_for_qwen2_vl(
|
927 | 927 | @INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen2_vl)
|
928 | 928 | @INPUT_REGISTRY.register_input_processor(input_processor_for_qwen2_vl)
|
929 | 929 | class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
930 |
| - SupportsPP): |
| 930 | + SupportsLoRA, SupportsPP): |
| 931 | + packed_modules_mapping = { |
| 932 | + "qkv_proj": [ |
| 933 | + "q_proj", |
| 934 | + "k_proj", |
| 935 | + "v_proj", |
| 936 | + ], |
| 937 | + "gate_up_proj": [ |
| 938 | + "gate_proj", |
| 939 | + "up_proj", |
| 940 | + ], |
| 941 | + } |
| 942 | + |
| 943 | + # LoRA specific attributes |
| 944 | + # TODO Support LoRA for the visual encoder in the future. |
| 945 | + supported_lora_modules = [ |
| 946 | + "qkv_proj", |
| 947 | + "o_proj", |
| 948 | + "gate_up_proj", |
| 949 | + "down_proj", |
| 950 | + ] |
| 951 | + embedding_modules = {} |
| 952 | + embedding_padding_modules = [] |
931 | 953 |
|
932 | 954 | def __init__(self,
|
933 | 955 | config: Qwen2VLConfig,
|
934 | 956 | multimodal_config: MultiModalConfig,
|
935 | 957 | cache_config: Optional[CacheConfig] = None,
|
936 |
| - quant_config: Optional[QuantizationConfig] = None) -> None: |
| 958 | + quant_config: Optional[QuantizationConfig] = None, |
| 959 | + lora_config: Optional[LoRAConfig] = None) -> None: |
| 960 | + |
937 | 961 | super().__init__()
|
938 | 962 |
|
939 | 963 | assert not cache_config.enable_prefix_caching, \
|
|
0 commit comments