From 02f7b0fe7c2593c81e652e8c39a27d0fbe1a117d Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Mon, 27 Nov 2023 17:23:13 +0800 Subject: [PATCH 1/6] Fix smoothquant minmax observer --- neural_compressor/adaptor/pytorch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 4910e960c15..137480de04d 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3118,7 +3118,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): from torch.ao.quantization.observer import MinMaxObserver static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver + alpha=0.5, act_observer=MinMaxObserver() ) else: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) @@ -3308,7 +3308,7 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): from torch.ao.quantization.observer import MinMaxObserver static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver + alpha=0.5, act_observer=MinMaxObserver() ) else: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) From 160d75a1398d1188455a63929a04f1adbfbd970a Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Mon, 27 Nov 2023 17:31:05 +0800 Subject: [PATCH 2/6] Update pytorch.py --- neural_compressor/adaptor/pytorch.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 137480de04d..645c490d535 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3116,7 +3116,11 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): if not folding: if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: from torch.ao.quantization.observer import MinMaxObserver - + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver + ) + elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: + from torch.ao.quantization.observer import MinMaxObserver static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) @@ -3306,7 +3310,11 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"): if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: from torch.ao.quantization.observer import MinMaxObserver - + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver + ) + elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: + from torch.ao.quantization.observer import MinMaxObserver static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) From adf01c82516fdb2623109cde406b1669c31dbb66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Nov 2023 09:32:08 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/adaptor/pytorch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 645c490d535..58efef50eb2 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3116,11 +3116,13 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): if not folding: if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: from torch.ao.quantization.observer import MinMaxObserver + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver ) elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: from torch.ao.quantization.observer import MinMaxObserver + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) @@ -3310,11 +3312,13 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"): if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: from torch.ao.quantization.observer import MinMaxObserver + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver ) elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: from torch.ao.quantization.observer import MinMaxObserver + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) From 67dca1f8a5848c14082a5b332929a51e5875e74b Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Mon, 27 Nov 2023 18:02:27 +0800 Subject: [PATCH 4/6] Update pytorch.py --- neural_compressor/adaptor/pytorch.py | 54 +++++++++++++++------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 58efef50eb2..0632e2fa611 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3114,20 +3114,22 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): smooth_quant_args = self.recipes.get("smooth_quant_args", {}) folding = smooth_quant_args.get("folding", False) if not folding: - if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: - from torch.ao.quantization.observer import MinMaxObserver - - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver - ) - elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: - from torch.ao.quantization.observer import MinMaxObserver - - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver() - ) + from torch.ao.quantization.observer import MinMaxObserver + + if self.version.release >= Version("2.1.1").release: + if self.sq_minmax_init: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver + ) + else: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) else: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) + if self.sq_minmax_init: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver() + ) + else: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if self.example_inputs is None: self.example_inputs = get_example_inputs(model, self.q_dataloader) from neural_compressor.adaptor.torch_utils.util import move_input_device @@ -3310,20 +3312,22 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): # Check save_qconf_summary part is a workaround for IPEX bug. # Sometimes the prepared model from get_op_capablitiy loss this attribute if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"): - if self.sq_minmax_init or self.version.release >= Version("2.1.1").release: - from torch.ao.quantization.observer import MinMaxObserver - - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver - ) - elif self.sq_minmax_init or self.version.release >= Version("2.1.0").release: - from torch.ao.quantization.observer import MinMaxObserver + from torch.ao.quantization.observer import MinMaxObserver - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver() - ) + if self.version.release >= Version("2.1.1").release: + if self.sq_minmax_init: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver + ) + else: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) else: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) + if self.sq_minmax_init: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver() + ) + else: + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if isinstance(self.example_inputs, dict): model._model = ipex.quantization.prepare( model._model, static_qconfig, example_kwarg_inputs=self.example_inputs, inplace=inplace From 86d9d069d1f640b52df758c290eed202398dd2a9 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Mon, 27 Nov 2023 18:22:34 +0800 Subject: [PATCH 5/6] Update pytorch.py --- neural_compressor/adaptor/pytorch.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 0632e2fa611..7246cca3dc6 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3117,17 +3117,18 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): from torch.ao.quantization.observer import MinMaxObserver if self.version.release >= Version("2.1.1").release: - if self.sq_minmax_init: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver ) - else: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) else: if self.sq_minmax_init: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) + logger.warning( + "The int8 model accuracy will be close to 0 with MinMaxobserver, " + + "the suggested IPEX version is higher or equal than 2.1.100." + ) else: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if self.example_inputs is None: @@ -3315,17 +3316,18 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): from torch.ao.quantization.observer import MinMaxObserver if self.version.release >= Version("2.1.1").release: - if self.sq_minmax_init: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver ) - else: - static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) else: if self.sq_minmax_init: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( alpha=0.5, act_observer=MinMaxObserver() ) + logger.warning( + "The int8 model accuracy will be close to 0 with MinMaxobserver, " + + "the suggested IPEX version is higher or equal than 2.1.100+cpu." + ) else: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if isinstance(self.example_inputs, dict): From 266f07f8223eeabd00e836a8c880942126c73b78 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:24:21 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/adaptor/pytorch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 7246cca3dc6..def044148ca 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3118,8 +3118,8 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): if self.version.release >= Version("2.1.1").release: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver - ) + alpha=0.5, act_observer=MinMaxObserver + ) else: if self.sq_minmax_init: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( @@ -3317,8 +3317,8 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): if self.version.release >= Version("2.1.1").release: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver - ) + alpha=0.5, act_observer=MinMaxObserver + ) else: if self.sq_minmax_init: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(