3232from tabulate import tabulate # noqa: E402
3333
3434
35- def bench (path : str , backend : BACKEND , adapter : Optional [Lora ]):
36- # test post-quant inference
37- model = GPTQModel .load (
38- model_id_or_path = path ,
39- backend = backend ,
40- adapter = adapter ,
41- )
42-
43- tokens = model .generate ("Capital of France is" )[0 ]
44- result = model .tokenizer .decode (tokens )
45- print (f"BACKEND: { backend } , Result: { result } " )
46- assert "paris" in result .lower (), f"`paris` not found in `{ result } `"
47-
48- bench_result = GPTQModel .eval (
49- model_or_id_or_path = model ,
50- framework = EVAL .LM_EVAL ,
51- tasks = [EVAL .LM_EVAL .ARC_CHALLENGE , EVAL .LM_EVAL .MMLU ],
52- batch_size = 32 ,
53- )
54-
55- del model
56- torch_empty_cache ()
57-
58- return bench_result
5935
6036class Test (ModelTest ):
6137 NATIVE_MODEL_ID = "/monster/data/model/Qwen2.5-0.5B-Instruct/"
@@ -140,8 +116,8 @@ def test_quant_and_eora(self):
140116
141117 # BACKEND.EXLLAMA_V2, BACKEND.EXLLAMA_V1, BACKEND.TRITON, BACKEND.CUDA,
142118 for backend in [ BACKEND .MARLIN ]: # BACKEND.IPEX, BACKEND.BITBLAS, BACKEND.EXLLAMA_V2V BACKEND.MARLIN
143- base_bench = bench (path = tmpdir , backend = backend , adapter = None ) # inference using qweights only
144- eora_bench = bench (path = tmpdir , backend = backend , adapter = eora ) # inference using eora (lora)
119+ base_bench = self . bench (path = tmpdir , backend = backend , adapter = None ) # inference using qweights only
120+ eora_bench = self . bench (path = tmpdir , backend = backend , adapter = eora ) # inference using eora (lora)
145121
146122 print ('--------GPTQModel + EoRA Config ---------' )
147123
@@ -158,3 +134,28 @@ def test_quant_and_eora(self):
158134 print (make_table (eora_bench ))
159135 if "groups" in eora_bench :
160136 print (make_table (eora_bench , "groups" ))
137+
138+ def bench (self , path : str , backend : BACKEND , adapter : Optional [Lora ]):
139+ # test post-quant inference
140+ model = GPTQModel .load (
141+ model_id_or_path = path ,
142+ backend = backend ,
143+ adapter = adapter ,
144+ )
145+
146+ tokens = model .generate ("Capital of France is" )[0 ]
147+ result = model .tokenizer .decode (tokens )
148+ print (f"BACKEND: { backend } , Result: { result } " )
149+ assert "paris" in result .lower (), f"`paris` not found in `{ result } `"
150+
151+ bench_result = GPTQModel .eval (
152+ model_or_id_or_path = model ,
153+ framework = EVAL .LM_EVAL ,
154+ tasks = [EVAL .LM_EVAL .ARC_CHALLENGE , EVAL .LM_EVAL .MMLU ],
155+ batch_size = self .get_batch_size (),
156+ )
157+
158+ del model
159+ torch_empty_cache ()
160+
161+ return bench_result
0 commit comments