diff --git a/MANIFEST.in b/MANIFEST.in index be1ee1891..fec669390 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,3 +4,4 @@ global-include gptqmodel_ext/**/*.cpp global-include gptqmodel_ext/**/*.cu global-include gptqmodel_ext/**/*.py include requirements.txt +prune tests/ \ No newline at end of file diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 02ee5f676..0760a2745 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -19,6 +19,7 @@ import os from lm_eval.utils import make_table +from tokenicer import Tokenicer if not os.environ.get("PYTORCH_CUDA_ALLOC_CONF", None): os.environ["PYTORCH_CUDA_ALLOC_CONF"] = 'expandable_segments:True' @@ -42,7 +43,7 @@ import numpy # noqa: E402 import torch # noqa: E402 from huggingface_hub import list_repo_files # noqa: E402 -from transformers import AutoConfig,AutoTokenizer # noqa: E402 +from transformers import AutoConfig, PreTrainedModel, PreTrainedTokenizerBase # noqa: E402 from ..quantization import QUANT_CONFIG_FILENAME # noqa: E402 from ..utils import BACKEND # noqa: E402 @@ -286,7 +287,8 @@ def from_quantized( def eval( cls, model_or_id_or_path: str=None, - tasks: Union[List[EVAL.LM_EVAL], List[EVAL.EVALPLUS]] = None, # set to None to tifx mutable warning + tokenizer: PreTrainedTokenizerBase=None, + tasks: Union[List[EVAL.LM_EVAL], List[EVAL.EVALPLUS]] = None, # set to None to fix mutable warning framework: EVAL = EVAL.LM_EVAL, batch_size: int = 1, trust_remote_code: bool = False, @@ -316,20 +318,29 @@ def eval( if isinstance(model_or_id_or_path, str): model = None model_id_or_path = model_or_id_or_path + elif isinstance(model_or_id_or_path, BaseGPTQModel) or isinstance(model_or_id_or_path, PreTrainedModel): + model = model_or_id_or_path + model_id_or_path = model.config.name_or_path # else: - model = model_or_id_or_path - model_id_or_path = model.model_local_path + raise ValueError(f"`model_or_id_or_path` is invalid. expected: `model instance or str` actual: `{model_or_id_or_path}`") + + if tokenizer is None: + if isinstance(model, BaseGPTQModel): + tokenizer = model.tokenizer + elif isinstance(model, PreTrainedModel) or model_id_or_path.strip(): + tokenizer = Tokenicer.load(model_id_or_path) + + if tokenizer is None: + raise ValueError("Tokenizer: Auto-loading of tokenizer failed with `model_or_id_or_path`. Please pass in `tokenizer` as argument.") + + model_args["tokenizer"] = tokenizer if framework == EVAL.LM_EVAL: for task in tasks: if task not in EVAL.get_task_enums(): raise ValueError(f"lm_eval support tasks: {EVAL.get_all_tasks_string()}") - # model_id_or_path=model_id_or_path if model_id_or_path else model.model_id_or_path - # tokenizer = AutoTokenizer.from_pretrained(model_id_or_path, trust_remote_code=trust_remote_code) - tokenizer = model.tokenizer if model else AutoTokenizer.from_pretrained(model_id_or_path, trust_remote_code=trust_remote_code) - - model_name = 'hf' if backend == 'gptqmodel' else backend + model_name = "hf" if backend == "gptqmodel" else backend if backend == "gptqmodel": model_args["gptqmodel"] = True @@ -349,13 +360,13 @@ def eval( batch_size=batch_size, trust_remote_code=trust_remote_code, ) - apply_chat_template=args.pop("apply_chat_template", True if tokenizer.chat_template is not None else False) + results = simple_evaluate( model=model_name, model_args=model_args, tasks=[task.value for task in tasks], batch_size=batch_size, - apply_chat_template=apply_chat_template, + apply_chat_template=args.pop("apply_chat_template", True if tokenizer.chat_template is not None else False), gen_kwargs=args.pop("gen_kwargs", "temperature=0.0,top_k=50"), random_seed=random_seed, numpy_random_seed=random_seed, diff --git a/gptqmodel/utils/eval.py b/gptqmodel/utils/eval.py index 2f942c3f9..2aa080359 100644 --- a/gptqmodel/utils/eval.py +++ b/gptqmodel/utils/eval.py @@ -19,6 +19,7 @@ from enum import Enum from typing import Optional +from .evalplus import patch_evalplus class EVAL: class LM_EVAL(Enum): @@ -56,13 +57,15 @@ def get_all_tasks_string(cls): def evalplus( - model: str, + model, dataset: str, batch: int = 1, trust_remote_code: bool = False, output_file: Optional[str] = None, backend: str = 'gptqmodel' ): + patch_evalplus(model) + try: from evalplus.evaluate import evaluate except BaseException: diff --git a/gptqmodel/utils/evalplus.py b/gptqmodel/utils/evalplus.py new file mode 100644 index 000000000..368c91fa0 --- /dev/null +++ b/gptqmodel/utils/evalplus.py @@ -0,0 +1,79 @@ +import types + +from tokenicer import Tokenicer +from transformers import PreTrainedModel + + +def patch_strip(self, *args, **kwargs): + return self.config.name_or_path.strip(*args, **kwargs) + +def patch_tostring(self): + return self.config.name_or_path + +def patch_evalplus(model): + from ..models.base import BaseGPTQModel + if isinstance(model, BaseGPTQModel) or isinstance(model, PreTrainedModel): + model.strip = types.MethodType(patch_strip, model) + model.__str__ = types.MethodType(patch_tostring, model) + + import torch + from evalplus.provider.base import DecoderBase + from evalplus.provider.gptqmodel import GPTQModelDecoder + from evalplus.provider.utility import extra_eos_for_direct_completion + from gptqmodel.models import BaseGPTQModel + + from .. import GPTQModel + + class PatchedGPTQModelDecoder(DecoderBase): + def __init__( + self, + name: str, + dataset: str, + gptqmodel_backend: str = 'auto', + force_base_prompt: bool = False, + **kwargs, + ): + + super(GPTQModelDecoder, self).__init__(name=name, **kwargs) + + if hasattr(torch, "mps") and hasattr(torch.mps, "is_available") and torch.mps.is_available(): + device = torch.device("mps") + elif hasattr(torch, "xpu") and hasattr(torch.xpu, "is_available") and torch.xpu.is_available(): + device = torch.device("xpu") + elif hasattr(torch, "cuda") and hasattr(torch.cuda, "is_available") and torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + + self.device = device + + kwargs = { + "model_id_or_path": name, + "trust_remote_code": self.trust_remote_code, + "backend": gptqmodel_backend, + "device": device + } + self.skip_special_tokens = True + self.force_base_prompt = force_base_prompt + if isinstance(name, BaseGPTQModel): + self.model = name + self.tokenizer = self.model.tokenizer + elif isinstance(name, PreTrainedModel): + self.model = name + self.tokenizer = Tokenicer.load(name.config.name_or_path, trust_remote_code=self.trust_remote_code) + elif isinstance(name, str): + self.tokenizer = Tokenicer.load(name, trust_remote_code=self.trust_remote_code) + self.model = GPTQModel.load(**kwargs) + self.model = self.model.to(self.device) + else: + raise ValueError(f"`name` is invalid. expected: `model instance or str` actual: `{name}`") + + if self.tokenizer is None: + raise ValueError("Tokenizer: Auto-loading of tokenizer failed with `model_or_id_or_path`. Please pass in `tokenizer` as argument.") + + if self.is_direct_completion(): # no chat template + self.eos += extra_eos_for_direct_completion(dataset) + else: # with chat template + self.eos += ["\n```\n"] + + GPTQModelDecoder.__init__ = PatchedGPTQModelDecoder.__init__