From a144bed4013e94dbe25ef79e3642378ab959b3fc Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 09:42:07 +0800 Subject: [PATCH 1/6] check cuda v8 for marlin --- gptqmodel/nn_modules/qlinear/marlin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gptqmodel/nn_modules/qlinear/marlin.py b/gptqmodel/nn_modules/qlinear/marlin.py index af03bc823..1c3501f57 100644 --- a/gptqmodel/nn_modules/qlinear/marlin.py +++ b/gptqmodel/nn_modules/qlinear/marlin.py @@ -31,6 +31,8 @@ except ImportError as e: marlin_import_exception = e +HAS_CUDA_V8 = any(torch.cuda.get_device_capability(i)[0] >= 8 for i in range(torch.cuda.device_count())) + GPTQ_MARLIN_TILE = 16 GPTQ_MARLIN_MIN_THREAD_N = 64 GPTQ_MARLIN_MIN_THREAD_K = 128 @@ -307,6 +309,8 @@ def __init__(self, bits: int, group_size: int, desc_act: bool, sym: bool, infeat def validate(cls, **args) -> Tuple[bool, Optional[Exception]]: if IS_ROCM: return False, RuntimeError("marlin kernel is not supported by rocm.") + if not HAS_CUDA_V8: + return False, RuntimeError("marlin kernel requires CUDA version >= 8.") if marlin_import_exception is not None: return False, marlin_import_exception return cls._validate(**args) From d7190e344c433b2a42767b492cdeb95b16ce133f Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 09:48:28 +0800 Subject: [PATCH 2/6] check cuda 8 for installation --- setup.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index e9c6224f8..c0db1aa4e 100644 --- a/setup.py +++ b/setup.py @@ -125,6 +125,8 @@ def get_version_tag() -> str: import torch # noqa: E402 if TORCH_CUDA_ARCH_LIST is None: + HAS_CUDA_V8 = any(torch.cuda.get_device_capability(i)[0] >= 8 for i in range(torch.cuda.device_count())) + got_cuda_v6 = any(torch.cuda.get_device_capability(i)[0] >= 6 for i in range(torch.cuda.device_count())) got_cuda_between_v6_and_v8 = any(6 <= torch.cuda.get_device_capability(i)[0] < 8 for i in range(torch.cuda.device_count())) @@ -139,7 +141,8 @@ def get_version_tag() -> str: if BUILD_CUDA_EXT and not FORCE_BUILD: if got_cuda_between_v6_and_v8: FORCE_BUILD = True - +else: + HAS_CUDA_V8 = len([arch for arch in TORCH_CUDA_ARCH_LIST.split() if float(arch.split('+')[0]) >= 8]) > 0 if RELEASE_MODE == "1": common_setup_kwargs["version"] += f"+{get_version_tag()}" @@ -217,21 +220,20 @@ def get_version_tag() -> str: ), ] - if sys.platform != "win32": - # TODO: VC++: fatal error C1061: compiler limit : blocks nested too deeply - marlin_kernel = cpp_ext.CUDAExtension( - "gptqmodel_marlin_kernels", - [ - "gptqmodel_ext/marlin/marlin_cuda.cpp", - "gptqmodel_ext/marlin/marlin_cuda_kernel.cu", - "gptqmodel_ext/marlin/marlin_repack.cu", - ], - extra_link_args=extra_link_args, - extra_compile_args=extra_compile_args, - ) + if sys.platform != "win32":# TODO: VC++: fatal error C1061: compiler limit : blocks nested too deeply # https://rocm.docs.amd.com/projects/HIPIFY/en/docs-6.1.0/tables/CUDA_Device_API_supported_by_HIP.html # nv_bfloat16 and nv_bfloat162 (2x bf16) missing replacement in ROCm - if not ROCM_VERSION: + if HAS_CUDA_V8 and not ROCM_VERSION: + marlin_kernel = cpp_ext.CUDAExtension( + "gptqmodel_marlin_kernels", + [ + "gptqmodel_ext/marlin/marlin_cuda.cpp", + "gptqmodel_ext/marlin/marlin_cuda_kernel.cu", + "gptqmodel_ext/marlin/marlin_repack.cu", + ], + extra_link_args=extra_link_args, + extra_compile_args=extra_compile_args, + ) extensions.append(marlin_kernel) extensions += [ # TODO: VC++: error lnk2001 unresolved external symbol cublasHgemm From a59d222d5dd8f08057e85b0139008f8eb1a27014 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 09:53:49 +0800 Subject: [PATCH 3/6] update msg --- gptqmodel/nn_modules/qlinear/marlin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptqmodel/nn_modules/qlinear/marlin.py b/gptqmodel/nn_modules/qlinear/marlin.py index 1c3501f57..5cf947157 100644 --- a/gptqmodel/nn_modules/qlinear/marlin.py +++ b/gptqmodel/nn_modules/qlinear/marlin.py @@ -310,7 +310,7 @@ def validate(cls, **args) -> Tuple[bool, Optional[Exception]]: if IS_ROCM: return False, RuntimeError("marlin kernel is not supported by rocm.") if not HAS_CUDA_V8: - return False, RuntimeError("marlin kernel requires CUDA version >= 8.") + return False, RuntimeError("marlin kernel requires Compute Capability >= 8.0.") if marlin_import_exception is not None: return False, marlin_import_exception return cls._validate(**args) From b164e6d4f02eea8eba5e63026964943c1f044eff Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 09:56:30 +0800 Subject: [PATCH 4/6] update skip marlin msg --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index c0db1aa4e..62f136b73 100644 --- a/setup.py +++ b/setup.py @@ -235,6 +235,8 @@ def get_version_tag() -> str: extra_compile_args=extra_compile_args, ) extensions.append(marlin_kernel) + elif not HAS_CUDA_V8: + print(f"marlin kernel only supports compute capability >= 8.0, there's no such cuda device, skipped.") extensions += [ # TODO: VC++: error lnk2001 unresolved external symbol cublasHgemm cpp_ext.CUDAExtension( From b44b227ed511eb25e051b6b34d05771c7f01da3e Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 10:25:05 +0800 Subject: [PATCH 5/6] check rocm first --- gptqmodel/nn_modules/qlinear/marlin.py | 3 +-- setup.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gptqmodel/nn_modules/qlinear/marlin.py b/gptqmodel/nn_modules/qlinear/marlin.py index 5cf947157..0276dde63 100644 --- a/gptqmodel/nn_modules/qlinear/marlin.py +++ b/gptqmodel/nn_modules/qlinear/marlin.py @@ -31,7 +31,6 @@ except ImportError as e: marlin_import_exception = e -HAS_CUDA_V8 = any(torch.cuda.get_device_capability(i)[0] >= 8 for i in range(torch.cuda.device_count())) GPTQ_MARLIN_TILE = 16 GPTQ_MARLIN_MIN_THREAD_N = 64 @@ -309,7 +308,7 @@ def __init__(self, bits: int, group_size: int, desc_act: bool, sym: bool, infeat def validate(cls, **args) -> Tuple[bool, Optional[Exception]]: if IS_ROCM: return False, RuntimeError("marlin kernel is not supported by rocm.") - if not HAS_CUDA_V8: + if not any(torch.cuda.get_device_capability(i)[0] >= 8 for i in range(torch.cuda.device_count())): return False, RuntimeError("marlin kernel requires Compute Capability >= 8.0.") if marlin_import_exception is not None: return False, marlin_import_exception diff --git a/setup.py b/setup.py index 62f136b73..041035011 100644 --- a/setup.py +++ b/setup.py @@ -142,7 +142,7 @@ def get_version_tag() -> str: if got_cuda_between_v6_and_v8: FORCE_BUILD = True else: - HAS_CUDA_V8 = len([arch for arch in TORCH_CUDA_ARCH_LIST.split() if float(arch.split('+')[0]) >= 8]) > 0 + HAS_CUDA_V8 = ROCM_VERSION and len([arch for arch in TORCH_CUDA_ARCH_LIST.split() if float(arch.split('+')[0]) >= 8]) > 0 if RELEASE_MODE == "1": common_setup_kwargs["version"] += f"+{get_version_tag()}" From 7d749b3021a63fed5aee1e7fcc400cdab69d1f12 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Sat, 18 Jan 2025 10:26:49 +0800 Subject: [PATCH 6/6] check not ROCM_VERSION --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 041035011..f48170368 100644 --- a/setup.py +++ b/setup.py @@ -142,7 +142,7 @@ def get_version_tag() -> str: if got_cuda_between_v6_and_v8: FORCE_BUILD = True else: - HAS_CUDA_V8 = ROCM_VERSION and len([arch for arch in TORCH_CUDA_ARCH_LIST.split() if float(arch.split('+')[0]) >= 8]) > 0 + HAS_CUDA_V8 = not ROCM_VERSION and len([arch for arch in TORCH_CUDA_ARCH_LIST.split() if float(arch.split('+')[0]) >= 8]) > 0 if RELEASE_MODE == "1": common_setup_kwargs["version"] += f"+{get_version_tag()}"