Follow AutoRound updates [2.x] (#1652)

Kaihui-intel · web-flow · commit 9a7ddda6f852 · 2024-03-07T09:44:18.000+08:00
Signed-off-by: Kaihui-intel &lt;kaihui.tang@intel.com&gt;
diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh
@@ -99,7 +99,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
 fi
 
 if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
-    pip install git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04
+    pip install git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce
 fi
 
 # test deps
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -4913,11 +4913,11 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
             weight_config[op_name]["data_type"] = config["weight"]["dtype"]
             weight_config[op_name]["bits"] = config["weight"]["bits"]
             weight_config[op_name]["group_size"] = config["weight"]["group_size"]
-            weight_config[op_name]["scheme"] = config["weight"]["scheme"]
+            weight_config[op_name]["sym"] = config["weight"]["scheme"] == "sym"
 
         # auto round recipes
         enable_full_range = self.recipes["autoround_args"].get("enable_full_range", False)
-        bs = self.recipes["autoround_args"].get("bs", 8)
+        batch_size = self.recipes["autoround_args"].get("batch_size", 8)
         lr_scheduler = self.recipes["autoround_args"].get("lr_scheduler", None)
         dataset_name = self.recipes["autoround_args"].get("dataset_name", "NeelNanda/pile-10k")
         dataset_split = self.recipes["autoround_args"].get("dataset_split", "train")
@@ -4943,10 +4943,10 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
             tokenizer=None,
             bits=4,
             group_size=128,
-            scheme="asym",
+            sym=False,
             weight_config=weight_config,
             enable_full_range=enable_full_range,
-            bs=bs,
+            batch_size=batch_size,
             lr_scheduler=lr_scheduler,
             dataloader=dataloader,
             dataset_name=dataset_name,
diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py
@@ -677,10 +677,10 @@ def autoround_quantize(
     tokenizer,
     bits: int = 4,
     group_size: int = 128,
-    scheme: str = "asym",
+    sym: bool = False,
     weight_config: dict = {},
     enable_full_range: bool = False,  ##for symmetric, TODO support later
-    bs: int = 8,
+    batch_size: int = 8,
     amp: bool = True,
     device=None,
     lr_scheduler=None,
@@ -711,7 +711,7 @@ def autoround_quantize(
     tokenizer: Tokenizer for processing input data. Temporarily set as a mandatory parameter.
     bits (int): Number of bits for quantization (default is 4).
     group_size (int): Size of the quantization group (default is 128).
-    scheme (str): The quantization scheme to be used (default is "asym").
+    sym (bool): Whether the symmetric quantization is to be used.
     weight_config (dict): Configuration for weight quantization (default is an empty dictionary).
     weight_config={
                 'layer1':##layer_name
@@ -758,10 +758,10 @@ def autoround_quantize(
         tokenizer=tokenizer,
         bits=bits,
         group_size=group_size,
-        scheme=scheme,
+        sym=sym,
         weight_config=weight_config,
         enable_full_range=enable_full_range,  ##for symmetric, TODO support later
-        bs=bs,
+        batch_size=batch_size,
         amp=amp,
         device=device,
         lr_scheduler=lr_scheduler,
diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
@@ -559,9 +559,9 @@ def export_compressed_model(
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
         elif autoround_config:
-            from auto_round.export.export_to_itrex.export import _pack_model  # pylint: disable=E0401
+            from auto_round.export.export_to_itrex.export import pack_model  # pylint: disable=E0401
 
-            self.model = _pack_model(
+            self.model = pack_model(
                 self.model,
                 weight_config=autoround_config,
                 enable_full_range=enable_full_range,
diff --git a/test/requirements.txt b/test/requirements.txt
@@ -1,7 +1,7 @@
 --find-links https://download.pytorch.org/whl/torch_stable.html
 accelerate==0.21.0
 dynast==1.6.0rc1
-git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04
+git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce
 horovod
 intel-extension-for-pytorch
 intel-tensorflow>=2.12.0