1919import os
2020
2121from lm_eval .utils import make_table
22+ from tokenicer import Tokenicer
2223
2324if not os .environ .get ("PYTORCH_CUDA_ALLOC_CONF" , None ):
2425 os .environ ["PYTORCH_CUDA_ALLOC_CONF" ] = 'expandable_segments:True'
4243import numpy # noqa: E402
4344import torch # noqa: E402
4445from huggingface_hub import list_repo_files # noqa: E402
45- from transformers import AutoConfig ,AutoTokenizer # noqa: E402
46+ from transformers import AutoConfig , PreTrainedModel , PreTrainedTokenizerBase # noqa: E402
4647
4748from ..quantization import QUANT_CONFIG_FILENAME # noqa: E402
4849from ..utils import BACKEND # noqa: E402
@@ -286,7 +287,8 @@ def from_quantized(
286287 def eval (
287288 cls ,
288289 model_or_id_or_path : str = None ,
289- tasks : Union [List [EVAL .LM_EVAL ], List [EVAL .EVALPLUS ]] = None , # set to None to tifx mutable warning
290+ tokenizer : PreTrainedTokenizerBase = None ,
291+ tasks : Union [List [EVAL .LM_EVAL ], List [EVAL .EVALPLUS ]] = None , # set to None to fix mutable warning
290292 framework : EVAL = EVAL .LM_EVAL ,
291293 batch_size : int = 1 ,
292294 trust_remote_code : bool = False ,
@@ -316,20 +318,29 @@ def eval(
316318 if isinstance (model_or_id_or_path , str ):
317319 model = None
318320 model_id_or_path = model_or_id_or_path
321+ elif isinstance (model_or_id_or_path , BaseGPTQModel ) or isinstance (model_or_id_or_path , PreTrainedModel ):
322+ model = model_or_id_or_path
323+ model_id_or_path = model .config .name_or_path #
319324 else :
320- model = model_or_id_or_path
321- model_id_or_path = model .model_local_path
325+ raise ValueError (f"`model_or_id_or_path` is invalid. expected: `model instance or str` actual: `{ model_or_id_or_path } `" )
326+
327+ if tokenizer is None :
328+ if isinstance (model , BaseGPTQModel ):
329+ tokenizer = model .tokenizer
330+ elif isinstance (model , PreTrainedModel ) or model_id_or_path .strip ():
331+ tokenizer = Tokenicer .load (model_id_or_path )
332+
333+ if tokenizer is None :
334+ raise ValueError ("Tokenizer: Auto-loading of tokenizer failed with `model_or_id_or_path`. Please pass in `tokenizer` as argument." )
335+
336+ model_args ["tokenizer" ] = tokenizer
322337
323338 if framework == EVAL .LM_EVAL :
324339 for task in tasks :
325340 if task not in EVAL .get_task_enums ():
326341 raise ValueError (f"lm_eval support tasks: { EVAL .get_all_tasks_string ()} " )
327342
328- # model_id_or_path=model_id_or_path if model_id_or_path else model.model_id_or_path
329- # tokenizer = AutoTokenizer.from_pretrained(model_id_or_path, trust_remote_code=trust_remote_code)
330- tokenizer = model .tokenizer if model else AutoTokenizer .from_pretrained (model_id_or_path , trust_remote_code = trust_remote_code )
331-
332- model_name = 'hf' if backend == 'gptqmodel' else backend
343+ model_name = "hf" if backend == "gptqmodel" else backend
333344
334345 if backend == "gptqmodel" :
335346 model_args ["gptqmodel" ] = True
@@ -349,13 +360,13 @@ def eval(
349360 batch_size = batch_size ,
350361 trust_remote_code = trust_remote_code ,
351362 )
352- apply_chat_template = args . pop ( "apply_chat_template" , True if tokenizer . chat_template is not None else False )
363+
353364 results = simple_evaluate (
354365 model = model_name ,
355366 model_args = model_args ,
356367 tasks = [task .value for task in tasks ],
357368 batch_size = batch_size ,
358- apply_chat_template = apply_chat_template ,
369+ apply_chat_template = args . pop ( " apply_chat_template" , True if tokenizer . chat_template is not None else False ) ,
359370 gen_kwargs = args .pop ("gen_kwargs" , "temperature=0.0,top_k=50" ),
360371 random_seed = random_seed ,
361372 numpy_random_seed = random_seed ,
0 commit comments