Skip to content

Commit 024e167

Browse files
add 3 bit test
1 parent c253dde commit 024e167

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

tests/test_q4_cuda.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616

1717
# -- do not touch
1818
import os
19+
import tempfile
20+
21+
from gptqmodel.utils import Perplexity
1922

2023
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
2124
# -- end do not touch
2225

2326

2427
import torch # noqa: E402
25-
from gptqmodel import BACKEND, GPTQModel # noqa: E402
28+
from gptqmodel import BACKEND, GPTQModel, QuantizeConfig # noqa: E402
2629
from models.model_test import ModelTest # noqa: E402
2730
from parameterized import parameterized # noqa: E402
2831
from transformers import AutoTokenizer # noqa: E402
@@ -75,3 +78,13 @@ def test_generation_desc_act_false(self, torch_dtype, device):
7578
# This one does not.
7679
self.assertInference(model=model_q.model,tokenizer=self.tokenizer)
7780

81+
82+
def test_3bit(self):
83+
self.NATIVE_MODEL_ID="/monster/data/model/Llama-3.2-1B-Instruct"
84+
self.NATIVE_ARC_CHALLENGE_ACC = 0.3567
85+
self.NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3805
86+
self.QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.40
87+
88+
self.QUANTIZE_CONFIG_BITS = 3
89+
90+
self.quant_lm_eval()

0 commit comments

Comments
 (0)