Skip to content

Commit b1767af

Browse files
mgoinLeiWang1999
authored andcommitted
[Bugfix] Ignore GPTQ quantization of Qwen2-VL visual module (vllm-project#10169)
Signed-off-by: mgoin <[email protected]> Signed-off-by: LeiWang1999 <[email protected]>
1 parent 89f8b0b commit b1767af

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

vllm/model_executor/models/qwen2_vl.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@
5151
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
5252
RowParallelLinear)
5353
from vllm.model_executor.layers.logits_processor import LogitsProcessor
54-
from vllm.model_executor.layers.quantization import QuantizationConfig
54+
from vllm.model_executor.layers.quantization import (GPTQConfig,
55+
GPTQMarlinConfig,
56+
QuantizationConfig)
5557
from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
5658
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
5759
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
@@ -982,7 +984,7 @@ def __init__(self,
982984
self.visual = Qwen2VisionTransformer(
983985
config.vision_config,
984986
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
985-
quant_config=quant_config,
987+
quant_config=self._maybe_ignore_quant_config(quant_config),
986988
prefix="visual",
987989
)
988990

@@ -1008,6 +1010,14 @@ def __init__(self,
10081010
make_empty_intermediate_tensors_factory(
10091011
["hidden_states", "residual"], config.hidden_size))
10101012

1013+
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
1014+
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
1015+
# seems to avoid vision encoder sections for some models.
1016+
# See: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4
1017+
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
1018+
return None
1019+
return quant_config
1020+
10111021
def _validate_and_reshape_mm_tensor(self,
10121022
mm_input: Union[torch.Tensor,
10131023
List[torch.Tensor]],

0 commit comments

Comments
 (0)