ModelCloud · Qubitium · Feb 18, 2025 · Feb 18, 2025
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -18,7 +18,6 @@
 
 import json
 import os
-import shutil
 import time
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -28,7 +27,8 @@
 from packaging import version
 from packaging.version import Version
 from tokenicer import Tokenicer
-from transformers import AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizerBase, modeling_utils
+from transformers import AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizerBase, modeling_utils, ProcessorMixin, \
+    AutoProcessor
 
 from ..nn_modules.hooked_linear import replace_linear_with_hooked_linear
 from ..nn_modules.qlinear import BaseQuantLinear
@@ -87,6 +87,9 @@ class BaseGPTQModel(nn.Module):
     require_dtype: Optional[str|torch.dtype] = None
     require_fast_init: bool = True
 
+    # some models require Processor? For example, Qwen2VLImageProcessor.
+    require_load_processor = False
+
     # TODO: use a better name and what if the value is not at the config root?
     # allow dynamic expert n-count layer extraction
     # so moe model defs do not need to write out 64 layers if expert size is 64 (Qwen2Moe)
@@ -148,6 +151,10 @@ def __init__(
         # stores all per-layer quant stats such as avg loss and processing time
         self.quant_log = []
 
+        self.processor: ProcessorMixin = None
+        if self.require_load_processor:
+            self.processor = AutoProcessor.from_pretrained(model_local_path)
+
         # apply patching of broken trust_remote_code models here
         if self.require_monkeypatch:
             self.monkey_patch()
@@ -952,14 +959,6 @@ def save(
             meta_quantizer: Optional[str] = None,
             **kwargs,
     ):
-        extra_json_file_names = ["preprocessor_config.json"]
-        for name in extra_json_file_names:
-            json_path = os.path.join(self.model_local_path, name)
-            if os.path.exists(json_path):
-                os.makedirs(save_dir, exist_ok=True)
-
-                shutil.copyfile(json_path, os.path.join(save_dir, name))
-
         if self.quantized:
             # Safetensors is unable to save tied weights, so we untie them here. Reference: https://github.com/huggingface/safetensors/issues/202
             #untie_weights(self.model)

diff --git a/gptqmodel/models/definitions/qwen2_vl.py b/gptqmodel/models/definitions/qwen2_vl.py
@@ -45,6 +45,8 @@ class Qwen2VLGPTQ(BaseGPTQModel):
 
     modality = [MODALITY.TEXT, MODALITY.IMAGE_TO_TEXT]
 
+    require_load_processor = True
+
     quant_override_files = {
         "preprocessor_config.json": {
             "do_convert_rgb": True,

diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py
@@ -314,6 +314,10 @@ def save_quantized(
 
         quantize_config.save_pretrained(save_dir)
 
+        # Save processor related config files. For example: preprocessor_config.json, chat_template.json
+        if self.processor is not None:
+            self.processor.save_pretrained(save_dir)
+
         # need to copy .py files for model/tokenizers not yet merged to HF transformers
         if self.trust_remote_code:
             copy_py_files(save_dir, model_id_or_path=self.model_local_path)