From 02f7b0fe7c2593c81e652e8c39a27d0fbe1a117d Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Mon, 27 Nov 2023 17:23:13 +0800
Subject: [PATCH 1/6] Fix smoothquant minmax observer

---
 neural_compressor/adaptor/pytorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 4910e960c15..137480de04d 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3118,7 +3118,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                                 from torch.ao.quantization.observer import MinMaxObserver
 
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                                    alpha=0.5, act_observer=MinMaxObserver
+                                    alpha=0.5, act_observer=MinMaxObserver()
                                 )
                             else:
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
@@ -3308,7 +3308,7 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
                 from torch.ao.quantization.observer import MinMaxObserver
 
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                    alpha=0.5, act_observer=MinMaxObserver
+                    alpha=0.5, act_observer=MinMaxObserver()
                 )
             else:
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)

From 160d75a1398d1188455a63929a04f1adbfbd970a Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Mon, 27 Nov 2023 17:31:05 +0800
Subject: [PATCH 2/6] Update pytorch.py

---
 neural_compressor/adaptor/pytorch.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 137480de04d..645c490d535 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3116,7 +3116,11 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                         if not folding:
                             if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
                                 from torch.ao.quantization.observer import MinMaxObserver
-
+                                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                                    alpha=0.5, act_observer=MinMaxObserver
+                                )
+                            elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
+                                from torch.ao.quantization.observer import MinMaxObserver
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                                     alpha=0.5, act_observer=MinMaxObserver()
                                 )
@@ -3306,7 +3310,11 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"):
             if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
                 from torch.ao.quantization.observer import MinMaxObserver
-
+                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                    alpha=0.5, act_observer=MinMaxObserver
+                )
+            elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
+                from torch.ao.quantization.observer import MinMaxObserver
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                     alpha=0.5, act_observer=MinMaxObserver()
                 )

From adf01c82516fdb2623109cde406b1669c31dbb66 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 27 Nov 2023 09:32:08 +0000
Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/adaptor/pytorch.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 645c490d535..58efef50eb2 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3116,11 +3116,13 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                         if not folding:
                             if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
                                 from torch.ao.quantization.observer import MinMaxObserver
+
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                                     alpha=0.5, act_observer=MinMaxObserver
                                 )
                             elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
                                 from torch.ao.quantization.observer import MinMaxObserver
+
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                                     alpha=0.5, act_observer=MinMaxObserver()
                                 )
@@ -3310,11 +3312,13 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"):
             if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
                 from torch.ao.quantization.observer import MinMaxObserver
+
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                     alpha=0.5, act_observer=MinMaxObserver
                 )
             elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
                 from torch.ao.quantization.observer import MinMaxObserver
+
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                     alpha=0.5, act_observer=MinMaxObserver()
                 )

From 67dca1f8a5848c14082a5b332929a51e5875e74b Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Mon, 27 Nov 2023 18:02:27 +0800
Subject: [PATCH 4/6] Update pytorch.py

---
 neural_compressor/adaptor/pytorch.py | 54 +++++++++++++++-------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 58efef50eb2..0632e2fa611 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3114,20 +3114,22 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                         smooth_quant_args = self.recipes.get("smooth_quant_args", {})
                         folding = smooth_quant_args.get("folding", False)
                         if not folding:
-                            if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
-                                from torch.ao.quantization.observer import MinMaxObserver
-
-                                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                                    alpha=0.5, act_observer=MinMaxObserver
-                                )
-                            elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
-                                from torch.ao.quantization.observer import MinMaxObserver
-
-                                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                                    alpha=0.5, act_observer=MinMaxObserver()
-                                )
+                            from torch.ao.quantization.observer import MinMaxObserver
+
+                            if self.version.release >= Version("2.1.1").release:
+                                if self.sq_minmax_init:
+                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                                        alpha=0.5, act_observer=MinMaxObserver
+                                    )
+                                else:
+                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
                             else:
-                                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
+                                if self.sq_minmax_init:
+                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                                        alpha=0.5, act_observer=MinMaxObserver()
+                                    )
+                                else:
+                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
                     if self.example_inputs is None:
                         self.example_inputs = get_example_inputs(model, self.q_dataloader)
                     from neural_compressor.adaptor.torch_utils.util import move_input_device
@@ -3310,20 +3312,22 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         # Check save_qconf_summary part is a workaround for IPEX bug.
         # Sometimes the prepared model from get_op_capablitiy loss this attribute
         if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"):
-            if self.sq_minmax_init or self.version.release >= Version("2.1.1").release:
-                from torch.ao.quantization.observer import MinMaxObserver
-
-                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                    alpha=0.5, act_observer=MinMaxObserver
-                )
-            elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release:
-                from torch.ao.quantization.observer import MinMaxObserver
+            from torch.ao.quantization.observer import MinMaxObserver
 
-                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                    alpha=0.5, act_observer=MinMaxObserver()
-                )
+            if self.version.release >= Version("2.1.1").release:
+                if self.sq_minmax_init:
+                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                        alpha=0.5, act_observer=MinMaxObserver
+                    )
+                else:
+                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
             else:
-                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
+                if self.sq_minmax_init:
+                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                        alpha=0.5, act_observer=MinMaxObserver()
+                    )
+                else:
+                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
             if isinstance(self.example_inputs, dict):
                 model._model = ipex.quantization.prepare(
                     model._model, static_qconfig, example_kwarg_inputs=self.example_inputs, inplace=inplace

From 86d9d069d1f640b52df758c290eed202398dd2a9 Mon Sep 17 00:00:00 2001
From: "Wang, Chang" <chang1.wang@intel.com>
Date: Mon, 27 Nov 2023 18:22:34 +0800
Subject: [PATCH 5/6] Update pytorch.py

---
 neural_compressor/adaptor/pytorch.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 0632e2fa611..7246cca3dc6 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3117,17 +3117,18 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
                             from torch.ao.quantization.observer import MinMaxObserver
 
                             if self.version.release >= Version("2.1.1").release:
-                                if self.sq_minmax_init:
-                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                                         alpha=0.5, act_observer=MinMaxObserver
                                     )
-                                else:
-                                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
                             else:
                                 if self.sq_minmax_init:
                                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                                         alpha=0.5, act_observer=MinMaxObserver()
                                     )
+                                    logger.warning(
+                                        "The int8 model accuracy will be close to 0 with MinMaxobserver, "
+                                        + "the suggested IPEX version is higher or equal than 2.1.100."
+                                    )
                                 else:
                                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
                     if self.example_inputs is None:
@@ -3315,17 +3316,18 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
             from torch.ao.quantization.observer import MinMaxObserver
 
             if self.version.release >= Version("2.1.1").release:
-                if self.sq_minmax_init:
-                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
+                static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                         alpha=0.5, act_observer=MinMaxObserver
                     )
-                else:
-                    static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
             else:
                 if self.sq_minmax_init:
                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
                         alpha=0.5, act_observer=MinMaxObserver()
                     )
+                    logger.warning(
+                        "The int8 model accuracy will be close to 0 with MinMaxobserver, "
+                        + "the suggested IPEX version is higher or equal than 2.1.100+cpu."
+                    )
                 else:
                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5)
             if isinstance(self.example_inputs, dict):

From 266f07f8223eeabd00e836a8c880942126c73b78 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 27 Nov 2023 10:24:21 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/adaptor/pytorch.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 7246cca3dc6..def044148ca 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3118,8 +3118,8 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
 
                             if self.version.release >= Version("2.1.1").release:
                                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                                        alpha=0.5, act_observer=MinMaxObserver
-                                    )
+                                    alpha=0.5, act_observer=MinMaxObserver
+                                )
                             else:
                                 if self.sq_minmax_init:
                                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
@@ -3317,8 +3317,8 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
 
             if self.version.release >= Version("2.1.1").release:
                 static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(
-                        alpha=0.5, act_observer=MinMaxObserver
-                    )
+                    alpha=0.5, act_observer=MinMaxObserver
+                )
             else:
                 if self.sq_minmax_init:
                     static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(