Skip to content

Commit b35ff8f

Browse files
authored
example update for 3.x ipex sq (#1902)
Signed-off-by: violetch24 <[email protected]>
1 parent 000946f commit b35ff8f

File tree

7 files changed

+49
-30
lines changed

7 files changed

+49
-30
lines changed

examples/.config/model_params_pytorch_3x.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
"batch_size": 8
8686
},
8787
"gpt_j_ipex":{
88-
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
88+
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
8989
"dataset_location": "",
9090
"input_model": "",
9191
"main_script": "run_clm_no_trainer.py",
@@ -99,7 +99,7 @@
9999
"batch_size": 1
100100
},
101101
"llama2_7b_ipex":{
102-
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
102+
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
103103
"dataset_location": "",
104104
"input_model": "",
105105
"main_script": "run_clm_no_trainer.py",
@@ -113,7 +113,7 @@
113113
"batch_size": 1
114114
},
115115
"opt_125m_ipex":{
116-
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
116+
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
117117
"dataset_location": "",
118118
"input_model": "",
119119
"main_script": "run_clm_no_trainer.py",

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ neural-compressor
1111
intel-extension-for-transformers
1212
lm_eval==0.4.2
1313
peft
14+
optimum-intel

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,6 @@ def get_user_model():
162162
collate_fn=calib_evaluator.collate_batch,
163163
)
164164

165-
from neural_compressor.torch.quantization import SmoothQuantConfig
166-
167-
args.alpha = eval(args.alpha)
168-
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
169-
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False, excluded_precisions=excluded_precisions)
170-
171-
if re.search("gpt", user_model.config.model_type):
172-
quant_config.set_local(torch.add, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
173-
174165
from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
175166
from tqdm import tqdm
176167

@@ -189,16 +180,39 @@ def run_fn(model):
189180
if calib_iter >= args.calib_iters:
190181
break
191182
return
192-
183+
184+
def eval_func(model):
185+
config = AutoConfig.from_pretrained(args.model)
186+
setattr(model, "config", config)
187+
188+
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
189+
eval_args = LMEvalParser(
190+
model="hf",
191+
user_model=model,
192+
tokenizer=tokenizer,
193+
batch_size=args.batch_size,
194+
tasks=args.tasks,
195+
device="cpu",
196+
)
197+
results = evaluate(eval_args)
198+
if args.tasks == "wikitext":
199+
return results["results"][args.tasks]["word_perplexity,none"]
200+
else:
201+
return results["results"][args.tasks]["acc,none"]
202+
193203
from utils import get_example_inputs
194204

195205
example_inputs = get_example_inputs(user_model, calib_dataloader)
196206

197-
from neural_compressor.torch.quantization import prepare, convert
198-
199-
user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
200-
run_fn(user_model)
201-
user_model = convert(user_model)
207+
from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
208+
tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
209+
user_model = autotune(
210+
user_model,
211+
tune_config=tune_config,
212+
eval_fn=eval_func,
213+
run_fn=run_fn,
214+
example_inputs=example_inputs,
215+
)
202216
user_model.save(args.output_dir)
203217

204218

@@ -231,11 +245,10 @@ def run_fn(model):
231245
results = evaluate(eval_args)
232246
for task_name in args.tasks.split(","):
233247
if task_name == "wikitext":
234-
acc = results["results"][task_name]["word_perplexity,none"]
248+
print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["word_perplexity,none"]))
235249
else:
236-
acc = results["results"][task_name]["acc,none"]
237-
print("Accuracy: %.5f" % acc)
238-
print("Batch size = %d" % args.batch_size)
250+
print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["acc,none"]))
251+
239252

240253
if args.performance:
241254
user_model.eval()

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,9 @@ def get_user_model():
164164
)
165165

166166

167-
from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig
168-
quant_config = get_default_static_config()
169-
quant_config.excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
167+
from neural_compressor.torch.quantization import StaticQuantConfig
168+
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
169+
quant_config = StaticQuantConfig(excluded_precisions=excluded_precisions)
170170
if re.search("gpt", user_model.config.model_type):
171171
quant_config.set_local("add", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
172172

neural_compressor/torch/quantization/autotune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def autotune(
8181
best_quant_model = None
8282
eval_func_wrapper = EvaluationFuncWrapper(eval_fn, eval_args)
8383
config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
84-
baseline: float = eval_func_wrapper.evaluate(model)
84+
baseline: float = eval_func_wrapper.evaluate(deepcopy(model))
8585
tuning_monitor.set_baseline(baseline)
8686
tuning_logger.tuning_start()
8787
for trial_index, quant_config in enumerate(config_loader, 1):

neural_compressor/torch/quantization/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,8 +1582,14 @@ def get_model_info(self, model: torch.nn.Module, example_inputs) -> List[Tuple[s
15821582

15831583
@classmethod
15841584
def get_config_set_for_tuning(cls) -> Union[None, "SmoothQuantConfig", List["SmoothQuantConfig"]]:
1585-
"""Get the default configuration set for tuning."""
1586-
return SmoothQuantConfig(alpha=[0.1, 0.5], folding=[True, False], scale_sharing=[True, False])
1585+
import numpy as np
1586+
1587+
return SmoothQuantConfig(
1588+
alpha=np.arange(0.1, 1.0, 0.1).tolist(),
1589+
folding=[True, False],
1590+
scale_sharing=[True, False],
1591+
excluded_precisions=[["bf16"]],
1592+
)
15871593

15881594

15891595
def get_default_sq_config() -> SmoothQuantConfig:

test/3x/torch/quantization/test_static_quant.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def test_static_quant_with_quantize_API(self):
216216
def test_static_quant_mixed_precision(self):
217217
fp32_model = copy.deepcopy(self.fp32_model)
218218
example_inputs = self.input
219-
quant_config = get_default_static_config()
219+
quant_config = StaticQuantConfig(excluded_precisions=["bf16"])
220220
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
221221
run_fn(prepared_model)
222222
q_model = convert(prepared_model)
@@ -229,7 +229,6 @@ def test_static_quant_mixed_precision(self):
229229
q_model = convert(prepared_model)
230230
assert q_model is not None, "Quantization failed!"
231231

232-
quant_config.excluded_precisions = ["bf16"]
233232
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
234233
run_fn(prepared_model)
235234
q_model = convert(prepared_model)

0 commit comments

Comments
 (0)