Skip to content

Commit 9d41a7d

Browse files
authored
refine docs, update requirements (#1493)
Signed-off-by: Zhang, Weiwei1 <[email protected]>
1 parent 699d644 commit 9d41a7d

File tree

5 files changed

+50
-86
lines changed

5 files changed

+50
-86
lines changed

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/README.md

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,24 @@ AutoRound is an advanced weight-only quantization algorithm, based on SignRound.
66
## Prerequisites
77
- Python 3.9 or higher
88

9+
910
- The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
1011
| Model | Transformers version |
1112
| :----: | :----: |
12-
| EleutherAI/gpt-j-6b | 4.28/4.30/4.34 |
13-
| huggyllama/llama-7b | 4.28/4.30/4.34 |
14-
| meta-llama/Llama-2-7b-hf | 4.30/4.34 |
15-
| facebook/opt-6.7b | 4.28/4.30/4.34 |
16-
| tiiuae/falcon-7b | 4.28/4.30/4.34 |
17-
| mosaicml/mpt-7b | 4.28/4.30/4.34 |
18-
| bigscience/bloom-7b1 | 4.28/4.30/4.34 |
13+
| EleutherAI/gpt-j-6b | 4.28/4.30/4.34/4.36 |
14+
| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
15+
| meta-llama/Llama-2-7b-hf | 4.30/4.34/4.36 |
16+
| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
17+
| tiiuae/falcon-7b | 4.28/4.30/4.34/4.36 |
18+
| mosaicml/mpt-7b | 4.28/4.30/4.34/4.36 |
19+
| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |
1920
| baichuan-inc/Baichuan-7B | 4.28/4.30 |
20-
| Qwen/Qwen-7B | 4.28/4.30/4.34 |
21-
| THUDM/chatglm2-6b | 4.28/4.30 |
22-
| mistralai/Mistral-7B-v0.1 | 4.34 |
21+
| Qwen/Qwen-7B | 4.28/4.30/4.34/4.36 |
22+
| THUDM/chatglm3-6b | 4.34/4.36 |
23+
| mistralai/Mistral-7B-v0.1 | 4.34/4.36 |
24+
25+
Please note that all experiments in the SignRound+ technical report were conducted using transformers version 4.34.1.
26+
2327

2428

2529
## Installation
@@ -42,7 +46,7 @@ Include the flag `--adam`. Note that AdamW may be slightly less effective than S
4246

4347
- **Running the Original SignRound:**
4448
```bash
45-
CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025
49+
CUDA_VISIBLE_DEVICES=0 python3 main.py --model_name facebook/opt-125m --amp --num_bits 4 --group_size -1 --iters 400 --lr 0.0025 --minmax_lr 0.0025
4650
```
4751
It's recommended to use `--enable_minmax_tuning`.
4852

@@ -64,3 +68,4 @@ If you find SignRound useful for your research, please cite our paper:
6468
year={2023}
6569
}
6670
```
71+

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/autoround/eval.py

Lines changed: 9 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from parse_results import result_parser
88
import pprint
99
import json
10+
import re
1011
import shutil
1112
import transformers
1213
import time
@@ -158,12 +159,6 @@ def simple_evaluate(
158159
+ ".db",
159160
)
160161

161-
# if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
162-
# if lm.tokenizer.pad_token is None:
163-
# lm.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
164-
# else:
165-
# lm.tokenizer.pad_token = '[PAD]'
166-
167162
task_dict = lm_eval.tasks.get_task_dict(tasks)
168163

169164
if check_integrity:
@@ -238,14 +233,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
238233
if each in tasks:
239234
external_tasks.append(each)
240235
tasks.remove(each)
241-
#
242-
# lm = lm_eval.models.get_model("hf-causal-experimental").create_from_arg_string(
243-
# model_args,
244-
# {
245-
# "batch_size": eval_bs,
246-
# "max_batch_size": eval_bs,
247-
# "device": device}
248-
# )
249236

250237
results = {}
251238
model = None
@@ -254,41 +241,23 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
254241
try:
255242
num_fewshot = fewshots_dict[mark][tmp_tasks]
256243
task_names = lm_eval.utils.pattern_match([tmp_tasks], ALL_TASKS)
257-
# task_dict = get_task_dict(task_names)
258-
259-
# for lm-eval internal tasks
260244
print(f'********* {tmp_tasks} evaluate ************')
261245
task_s = time.time()
262246
for shot in num_fewshot:
263-
# tmp_results = evaluator.evaluate(
264-
# lm=lm,
265-
# task_dict=task_dict,
266-
# num_fewshot=shot,
267-
# limit=limit,
268-
# bootstrap_iters=100000,
269-
# description_dict=None,
270-
# decontamination_ngrams_path=None,
271-
# write_out=False,
272-
# output_base_path=None,
273-
# )
274-
# tmp_results, model = simple_evaluate(model="hf-causal", model_args=model_args, tasks=task_names,
275-
# num_fewshot=shot, limit=limit,batch_size=eval_bs,max_batch_size=eval_bs)
276-
277-
model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
278-
model_type = "hf-causal-experimental"
279-
# else:
280-
# model_args = f'pretrained={output_dir},tokenizer="{output_dir}",dtype={dtype}'
281-
# model_type = "hf-causal"
282-
247+
if bool(re.search("chatglm", output_dir.lower())):
248+
model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},trust_remote_code=True'
249+
model_type = "hf-causal"
250+
else:
251+
model_args = f'pretrained={output_dir},tokenizer={output_dir},dtype={dtype},use_accelerate={use_accelerate},trust_remote_code=True'
252+
model_type = "hf-causal-experimental"
253+
283254
if "wikitext" in task_names:
284255
tmp_eval_bs = 1
285256
else:
286257
tmp_eval_bs = eval_bs
287-
288258
tmp_results, lm = simple_evaluate(model=model_type, model_args=model_args, tasks=task_names,
289259
num_fewshot=shot, limit=limit, batch_size=tmp_eval_bs,
290260
max_batch_size=tmp_eval_bs, lm=lm)
291-
292261
sub_name = f'{tmp_tasks} {shot}-shot'
293262
print(f'{sub_name}: ')
294263
pprint.pprint(tmp_results["results"])
@@ -299,8 +268,6 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
299268
print(str(e))
300269
continue
301270

302-
# if isinstance(lm.tokenizer, transformers.LlamaTokenizerFast):
303-
# lm.tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False)
304271
tokenizer = transformers.AutoTokenizer.from_pretrained(output_dir, use_fast=False, trust_remote_code=True)
305272
model = lm.model
306273
# for external tasks
@@ -369,14 +336,9 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
369336
new_dict[new_key] = data[sub_key][sub_sub_key]
370337

371338
import pandas as pd
372-
373339
df = pd.DataFrame(data=new_dict, index=[0])
374-
375340
df.to_excel(excel_file)
376341

377-
# if output_dir == "./tmp_signround":
378-
# shutil.rmtree(output_dir)
379-
380342

381343
if __name__ == "__main__":
382344
import time
@@ -392,35 +354,15 @@ def eval_model(output_dir=None, model=None, tokenizer=None,
392354

393355
args = parser.parse_args()
394356
s = time.time()
395-
# 'wikitext2', 'ptb-new', 'c4-new', 'lambada_openai',
396-
# 'hellaswag', 'winogrande', 'piqa', 'coqa', 'drop', 'gsm8k','truthfulqa_mc',
397-
# "lambada_openai": [0],
398-
# "hellaswag": [0],
399-
# "winogrande": [0],
400-
# "piqa": [0],
401-
# "hendrycksTest-*": [0],
402-
# "wikitext": [0],
403-
# "truthfulqa_mc": [0],
404-
# "openbookqa": [0],
405-
# "boolq": [0],
406-
# "rte": [0],
407-
# "arc_easy": [0],
408-
# "arc_challenge": [0],
409-
410-
test_tasks = [
411-
"hendrycksTest-*", 'lambada_openai', "wikitext2", "ptb-new", "c4_new"
412-
413-
]
414357

415358
test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
416359
"hendrycksTest-*", "wikitext", "truthfulqa_mc", "openbookqa", "boolq", "rte", "arc_easy", "arc_challenge"]
417360
test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai', 'hellaswag', 'winogrande', 'piqa',
418361
]
419362
excel_name = (args.model_name).split('/')[-1] + ".xlsx"
420-
421-
# test_tasks = ['wikitext2', 'ptb-new', 'c4-new', 'lambada_openai']
422363
eval_model(output_dir=args.model_name,
423364
tasks=test_tasks,
424365
eval_bs=args.bs, eval_orig_float=True, limit=None, excel_file=excel_name)
425366

426367
print("cost time: ", time.time() - s)
368+
Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1-
transformers==4.34.1
1+
transformers==4.36.0
22
torch==2.0.1
33
git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9
44
fsspec==2023.9.2
5+
tiktoken
6+
transformers_stream_generator
7+
peft
8+
sentencepiece
9+
einops
10+
511
##the following is for intel neural compressor
612
schema
713
py-cpuinfo
814
prettytable
915
Pillow
1016
opencv-python-headless
11-
pycocotools
17+
pycocotools

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/signround/README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,20 @@ This is a sample code for SignRound ([arxiv](https://arxiv.org/abs/2309.05516)),
33
![overview](./overview.png)
44

55

6-
76
# Prerequisite
8-
python 3.9 or higher
7+
-python 3.9 or higher
8+
9+
- The transformers version required varies across different types of models. Here, the transformers version used for running models during experiments is provided as a reference.
10+
| Model | Transformers version |
11+
| :----: | :----: |
12+
| decapoda-research/llama-7b-hf | 4.28 |
13+
| huggyllama/llama-7b | 4.28/4.30/4.34/4.36 |
14+
| meta-llama/Llama-2-7b-hf | 4.28/4.30/4.34/4.36 |
15+
| facebook/opt-6.7b | 4.28/4.30/4.34/4.36 |
16+
| bigscience/bloom-7b1 | 4.28/4.30/4.34/4.36 |
17+
18+
Please note that all experimental data in the paper is based on transformer version 3.28.1. the huggingface source for llama-7b-hf mentioned in the paper, 'decapoda-research/llama-7b-hf', is currently unavailable. You may opt for 'huggyllama/llama-7b' as an alternative, but please be aware that this replacement might yield slight differences in results.
919

10-
pip install -r requirements.txt
1120

1221

1322
# Run
@@ -24,7 +33,7 @@ CUDA_VISIBLE_DEVICES=0 python3 signround.py --model_name facebook/opt-125m --amp
2433
## Known issue
2534
To address the original lambada evaluation bug in the old version of lm-eval, we have incorporated the lm-eval from intel extension for transformers(ITREX). This discrepancy may lead to certain variations.
2635

27-
To reproduce our results in the paper, please install ITREX
36+
To reproduce our results in the paper, please install ITREX
2837

2938
```bash
3039
pip install intel-extension-for-transformers
@@ -41,3 +50,5 @@ If you find SignRound useful or relevant to your research, please kindly cite ou
4150
}
4251
```
4352

53+
54+
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
accelerate
22
datasets==2.12.0
33
torch==1.13.1
4-
transformers==4.30.0
5-
git+https://github.com/EleutherAI/lm-evaluation-harness.git@e81d3cce155e93ba2445068767c738891ad97024
4+
transformers==4.36.0
5+
git+https://github.com/EleutherAI/lm-evaluation-harness.git@008fc2a23245c40384f2312718433eeb1e0f87a9

0 commit comments

Comments
 (0)