Skip to content

Commit 6928064

Browse files
authored
Bitblas cache (#129)
* cleanup * revert bad commit * key bitblas cache to both bitblas and gptqmodel version
1 parent b0e8a58 commit 6928064

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

gptqmodel/nn_modules/qlinear/qlinear_bitblas.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ def import_bitblas():
4646

4747
if BITBLAS_DATABASE_PATH is None:
4848
from bitblas.cache import get_database_path
49-
BITBLAS_DATABASE_PATH = get_database_path()
49+
from importlib.metadata import version
50+
51+
bitblas_version = version(distribution_name="bitblas")
52+
gptqmodel_version = version(distribution_name="gptqmodel")
53+
54+
# for stability, tvm compiled caches are stored keyed by bot bitblas and gptqmodel version
55+
BITBLAS_DATABASE_PATH = f"{get_database_path()}_v{bitblas_version}_gptqmodel_v{gptqmodel_version}"
5056

5157

5258
def unpack_qzeros(qzeros, bits):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ threadpoolctl>=3.5.0
1313
packaging>=24.1
1414
ninja>=1.11.1.1
1515
bitblas>=0.0.1.dev12
16+
importlib>=1.0.4

tests/test_q4_bitblas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import unittest # noqa: E402
88

99
import torch # noqa: E402
10-
from gptqmodel.nn_modules.qlinear.qlinear_bitblas import QuantLinear as BitBLASQuantLinear # noqa: E402
10+
from gptqmodel.nn_modules.qlinear.qlinear_bitblas import BitBLASQuantLinear # noqa: E402
1111

1212
try:
1313
from gptqmodel_exllama_kernels import prepare_buffers, set_tuning_params # noqa: F401

0 commit comments

Comments
 (0)