diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py index 66d383ae7..cb61e4b98 100644 --- a/gptqmodel/models/loader.py +++ b/gptqmodel/models/loader.py @@ -17,16 +17,12 @@ from __future__ import annotations import os -import time from importlib.metadata import PackageNotFoundError, version from typing import Dict, List, Optional, Union import torch import transformers -from ..nn_modules.qlinear.exllama_eora import ExllamaEoraQuantLinear -from ..nn_modules.qlinear.marlin import MarlinQuantLinear - if os.getenv('GPTQMODEL_USE_MODELSCOPE', 'False').lower() in ['true', '1']: try: from modelscope import snapshot_download @@ -46,7 +42,7 @@ from ..nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear from ..nn_modules.qlinear.ipex import IPEXQuantLinear from ..quantization import QuantizeConfig -from ..quantization.config import FORMAT, FORMAT_FIELD_JSON, MIN_VERSION_WITH_V2 +from ..quantization.config import FORMAT, MIN_VERSION_WITH_V2 from ..utils.backend import BACKEND from ..utils.importer import auto_select_device, normalize_device_device_map, select_quant_linear from ..utils.logger import setup_logger diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py index 6801abb07..7a54f1b94 100644 --- a/gptqmodel/utils/model.py +++ b/gptqmodel/utils/model.py @@ -24,6 +24,7 @@ import os import re import shutil +import time from concurrent.futures import ThreadPoolExecutor from enum import Enum from typing import Any, Dict, List, Optional, Tuple, Type @@ -50,7 +51,7 @@ from ..nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear from ..nn_modules.qlinear.ipex import IPEXQuantLinear from ..quantization import FORMAT, QuantizeConfig -from ..quantization.config import dynamic_get +from ..quantization.config import FORMAT_FIELD_JSON, dynamic_get from .backend import BACKEND from .importer import select_quant_linear from .logger import setup_logger