refine docs, update requirements (#1493)

WeiweiZhang1 · web-flow · commit 9d41a7d47423 · 2023-12-22T16:18:54.000+08:00
Signed-off-by: Zhang, Weiwei1 &lt;weiwei1.zhang@intel.com&gt;
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/README.md
@@ -6,20 +6,24 @@ AutoRound is an advanced weight-only quantization algorithm, based on SignRound.
 ## Prerequisites
 - Python 3.9 or higher
 
+
 - The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
     | Model | Transformers version |
     |  :----: | :----: |
-    | EleutherAI/gpt-j-6b | 4.28/4.30/4.34 |
-    | huggyllama/llama-7b | 4.28/4.30/4.34 |
-    | meta-llama/Llama-2-7b-hf | 4.30/4.34 |
-    | facebook/opt-6.7b | 4.28/4.30/4.34 |
-    | tiiuae/falcon-7b | 4.28/4.30/4.34 |
-    | mosaicml/mpt-7b | 4.28/4.30/4.34 |
-    | bigscience/bloom-7b1 | 4.28/4.30/4.34 |
+    | EleutherAI/gpt-j-6b | 4.28/4.30/4.34/4.36 |
+    | huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
+    | meta-llama/Llama-2-7b-hf | 4.30/4.34/4.36 |
+    | facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
+    | tiiuae/falcon-7b | 4.28/4.30/4.34/4.36 |
+    | mosaicml/mpt-7b | 4.28/4.30/4.34/4.36 |
+    | bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |
     | baichuan-inc/Baichuan-7B | 4.28/4.30 |
-    | Qwen/Qwen-7B | 4.28/4.30/4.34 |
-    | THUDM/chatglm2-6b | 4.28/4.30 |
-    | mistralai/Mistral-7B-v0.1 | 4.34 |
+    | Qwen/Qwen-7B | 4.28/4.30/4.34/4.36 |
+    | THUDM/chatglm3-6b | 4.34/4.36 |
+    | mistralai/Mistral-7B-v0.1 | 4.34/4.36 |
+    
+  Please note that all experiments in the SignRound+ technical report were conducted using transformers version 4.34.1.
+
 
 
 ## Installation
@@ -42,7 +46,7 @@ Include the flag `--adam`. Note that AdamW may be slightly less effective than S
 
 - **Running the Original SignRound:**
 ```bash
-CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025 
+CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025
 ```
 It's recommended to use `--enable_minmax_tuning`.
 
@@ -64,3 +68,4 @@ If you find SignRound useful for your research, please cite our paper:
   year={2023}
 }
 ```
+
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/eval.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/eval.py
@@ -7,6 +7,7 @@
 from parse_results import result_parser
 import pprint
 import json
+import re
 import shutil
 import transformers
 import time
@@ -158,12 +159,6 @@ def simple_evaluate(
                 + ".db",
             )
 
-    # if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
-    #     if lm.tokenizer.pad_token is None:
-    #         lm.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-    #     else:
-    #         lm.tokenizer.pad_token = '[PAD]'
-
     task_dict = lm_eval.tasks.get_task_dict(tasks)
 
     if check_integrity:
@@ -238,14 +233,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
         if each in tasks:
             external_tasks.append(each)
             tasks.remove(each)
-    #
-    # lm = lm_eval.models.get_model("hf-causal-experimental").create_from_arg_string(
-    #         model_args,
-    #         {
-    #             "batch_size": eval_bs,
-    #             "max_batch_size": eval_bs,
-    #             "device": device}
-    #         )
 
     results = {}
     model = None
@@ -254,41 +241,23 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
         try:
             num_fewshot = fewshots_dict[mark][tmp_tasks]
             task_names = lm_eval.utils.pattern_match([tmp_tasks], ALL_TASKS)
-            # task_dict = get_task_dict(task_names)
-
-            # for lm-eval internal tasks
             print(f'********* {tmp_tasks} evaluate ************')
             task_s = time.time()
             for shot in num_fewshot:
-                # tmp_results = evaluator.evaluate(
-                #         lm=lm,
-                #         task_dict=task_dict,
-                #         num_fewshot=shot,
-                #         limit=limit,
-                #         bootstrap_iters=100000,
-                #         description_dict=None,
-                #         decontamination_ngrams_path=None,
-                #         write_out=False,
-                #         output_base_path=None,
-                #         )
-                # tmp_results, model = simple_evaluate(model="hf-causal", model_args=model_args, tasks=task_names,
-                #                                      num_fewshot=shot, limit=limit,batch_size=eval_bs,max_batch_size=eval_bs)
-
-                model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
-                model_type = "hf-causal-experimental"
-                # else:
-                #     model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype}'
-                #     model_type = "hf-causal"
-
+                if bool(re.search("chatglm", output_dir.lower())):
+                    model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},trust_remote_code=True'
+                    model_type = "hf-causal"
+                else:
+                    model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
+                    model_type = "hf-causal-experimental"
+                    
                 if "wikitext" in task_names:
                     tmp_eval_bs = 1
                 else:
                     tmp_eval_bs = eval_bs
-
                 tmp_results, lm = simple_evaluate(model=model_type, model_args=model_args, tasks=task_names,
                                                   num_fewshot=shot, limit=limit, batch_size=tmp_eval_bs,
                                                   max_batch_size=tmp_eval_bs, lm=lm)
-
                 sub_name = f'{tmp_tasks} {shot}-shot'
                 print(f'{sub_name}: ')
                 pprint.pprint(tmp_results["results"])
@@ -299,8 +268,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
             print(str(e))
             continue
 
-    # if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
-    #     lm.tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False)
     tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False, trust_remote_code=True)
     model = lm.model
     # for external tasks
@@ -369,14 +336,9 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
                 new_dict[new_key] = data[sub_key][sub_sub_key]
 
     import pandas as pd
-
     df = pd.DataFrame(data=new_dict, index=[0])
-
     df.to_excel(excel_file)
 
-    # if output_dir == "./tmp_signround":
-    #     shutil.rmtree(output_dir)
-
 
 if __name__ == "__main__":
     import time
@@ -392,35 +354,15 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
 
     args = parser.parse_args()
     s = time.time()
-    # 'wikitext2', 'ptb-new', 'c4-new', 'lambada_openai',
-    #               'hellaswag', 'winogrande', 'piqa', 'coqa', 'drop', 'gsm8k','truthfulqa_mc',
-    # "lambada_openai": [0],
-    # "hellaswag": [0],
-    # "winogrande": [0],
-    # "piqa": [0],
-    # "hendrycksTest-*": [0],
-    # "wikitext": [0],
-    # "truthfulqa_mc": [0],
-    # "openbookqa": [0],
-    # "boolq": [0],
-    # "rte": [0],
-    # "arc_easy": [0],
-    # "arc_challenge": [0],
-
-    test_tasks = [
-        "hendrycksTest-*", 'lambada_openai', "wikitext2", "ptb-new", "c4_new"
-
-    ]
 
     test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
      "hendrycksTest-*", "wikitext", "truthfulqa_mc", "openbookqa", "boolq", "rte", "arc_easy", "arc_challenge"]
     test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
   ]
     excel_name = (args.model_name).split('/')[-1] + ".xlsx"
-
-    # test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai']
     eval_model(output_dir=args.model_name,
                tasks=test_tasks,
                eval_bs=args.bs, eval_orig_float=True, limit=None, excel_file=excel_name)
 
     print("cost time: ", time.time() - s)
+
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/requirements.txt
@@ -1,11 +1,17 @@
-transformers==4.34.1
+transformers==4.36.0
 torch==2.0.1
 git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9
 fsspec==2023.9.2
+tiktoken
+transformers_stream_generator
+peft
+sentencepiece
+einops
+
 ##the following is for intel neural compressor
 schema
 py-cpuinfo
 prettytable
 Pillow
 opencv-python-headless
-pycocotools
+pycocotools
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/signround/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/signround/README.md
@@ -3,11 +3,20 @@ This is a sample code for SignRound ([arxiv](https://arxiv.org/abs/2309.05516)),
 ![overview](./overview.png)
 
 
-
 # Prerequisite
-python 3.9 or higher 
+-python 3.9 or higher
+
+- The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
+    | Model | Transformers version |
+    |  :----: | :----: |
+    | decapoda-research/llama-7b-hf | 4.28 |
+    | huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
+    | meta-llama/Llama-2-7b-hf | 4.28/4.30/4.34/4.36 |
+    | facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
+    | bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |
+
+Please note that all experimental data in the paper is based on transformer version 3.28.1. the huggingface source for llama-7b-hf mentioned in the paper, 'decapoda-research/llama-7b-hf', is currently unavailable. You may opt for 'huggyllama/llama-7b' as an alternative, but please be aware that this replacement might yield slight differences in results. 
 
-pip install -r requirements.txt
 
 
 # Run
@@ -24,7 +33,7 @@ CUDA_VISIBLE_DEVICES=0 python3 signround.py --model_name facebook/opt-125m --amp
 ## Known issue
 To address the original lambada evaluation bug in the old version of lm-eval, we have incorporated the lm-eval from intel extension for transformers(ITREX). This discrepancy may lead to certain variations.
 
-To reproduce our results in the paper, please install ITREX 
+To reproduce our results in the paper, please install ITREX
 
 ```bash
 pip install intel-extension-for-transformers
@@ -41,3 +50,5 @@ If you find SignRound useful or relevant to your research, please kindly cite ou
 }
 ```
 
+
+
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/signround/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/signround/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
 datasets==2.12.0
 torch==1.13.1
-transformers==4.30.0
-git+https://github.com/EleutherAI/lm-evaluation-harness.git@e81d3cce155e93ba2445068767c738891ad97024
+transformers==4.36.0
+git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9