ModelCloud · Qubitium · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025
diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py
@@ -126,6 +126,8 @@ def from_pretrained(
         if quantize_config is None or not isinstance(quantize_config, QuantizeConfig):
             raise AttributeError("`quantize_config` must be passed and be an instance of QuantizeConfig.")
 
+        quantize_config.calculate_bits_per_weight()
+
         if quantize_config.device is not None:
             if device is not None or device_map is not None:
                 raise AttributeError("Passing device and device_map is not allowed when QuantizeConfig.device is set. Non-quantized model is always loaded as cpu. Please set QuantizeConfig.device for accelerator used in quantization or do not set for auto-selection.")
@@ -281,6 +283,8 @@ def from_quantized(
 
         quantize_config = QuantizeConfig.from_pretrained(model_local_path, **cached_file_kwargs, **kwargs)
 
+        quantize_config.calculate_bits_per_weight()
+
         if backend == BACKEND.VLLM or backend == BACKEND.SGLANG:
             if quantize_config.format != FORMAT.GPTQ:
                 raise ValueError(f"{backend} backend only supports FORMAT.GPTQ: actual = {quantize_config.format}")

diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py
@@ -365,6 +365,10 @@ def to_dict(self):
         dict_scale_dtype_to_str(out)
         return out
 
+    def calculate_bits_per_weight(self):
+        bpw = ((self.group_size * self.bits) + 16 * 2) / self.group_size
+        logger.info(f"Effective BPW (bits per weight): {bpw} bits")
+
 @dataclass
 class AutoRoundQuantizeConfig(QuantizeConfig):
     layer_config: dict = field(default_factory=dict)