diff --git a/README.md b/README.md index 7efa343f8..2b080ab6e 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,17 @@ result = model.generate("Uncovering deep insights begins with")[0] # tokens print(model.tokenizer.decode(result)) # string output ``` +To use models from [ModelScope](https://www.modelscope.cn/) instead of HuggingFace Hub, set an environment variable: +```shell +export GPTQMODEL_USE_MODELSCOPE=True +``` +```py +from gptqmodel import GPTQModel +# load Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4 from modelscope +model = GPTQModel.load("Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4") +result = model.generate("Uncovering deep insights begins with")[0] # tokens +print(model.tokenizer.decode(result)) # string output +``` ### OpenAI API compatible end-point ```py diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py index c800c3ae9..436c1abf9 100644 --- a/gptqmodel/__init__.py +++ b/gptqmodel/__init__.py @@ -19,3 +19,8 @@ from .utils import BACKEND from .utils.exllama import exllama_set_max_input_length from .version import __version__ + +import os +if os.getenv('GPTQMODEL_USE_MODELSCOPE', 'False').lower() in ['true', '1']: + from modelscope.utils.hf_util.patcher import patch_hub + patch_hub() \ No newline at end of file diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py index c834162e1..871c6c200 100644 --- a/gptqmodel/models/loader.py +++ b/gptqmodel/models/loader.py @@ -23,7 +23,10 @@ import torch import transformers -from huggingface_hub import snapshot_download +if os.getenv('GPTQMODEL_USE_MODELSCOPE', 'False').lower() in ['true', '1']: + from modelscope import snapshot_download +else: + from huggingface_hub import snapshot_download from packaging.version import InvalidVersion, Version from transformers import AutoConfig, AutoTokenizer, PretrainedConfig from transformers.modeling_utils import no_init_weights diff --git a/tests/test_modelscope.py b/tests/test_modelscope.py new file mode 100644 index 000000000..3076fba68 --- /dev/null +++ b/tests/test_modelscope.py @@ -0,0 +1,27 @@ +import os +os.environ["GPTQMODEL_USE_MODELSCOPE"] = "True" +import sys +import subprocess # noqa: E402 +import importlib.util # noqa: E402 + +if importlib.util.find_spec("modelscope") is None: + subprocess.check_call([sys.executable, "-m", "pip", "install", "modelscope", "-U"]) + +from models.model_test import ModelTest # noqa: E402 +from gptqmodel import GPTQModel + + +class TestLoadModelscope(ModelTest): + + @classmethod + def setUpClass(self): + self.MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4" + + def test_load_modelscope(self): + model = GPTQModel.load(self.MODEL_ID) + + result = model.generate("The capital of mainland China is")[0] + str_output = model.tokenizer.decode(result) + assert "beijing" in str_output.lower() or "bei-jing" in str_output.lower() + + del model \ No newline at end of file