Skip to content

Commit d5cb567

Browse files
authored
Support fp16 model to weight-only quantization for PyTorch framework (#1387)
Signed-off-by: Cheng, Penghui <[email protected]>
1 parent d81269d commit d5cb567

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

neural_compressor/adaptor/torch_utils/weight_only.py

+5
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ def rtn_quantize(
399399
for name, m in model.named_modules():
400400
if m.__class__.__name__ not in supported_layers:
401401
continue
402+
orig_dtype = next(m.parameters()).dtype
403+
if orig_dtype != torch.float:
404+
m = m.float()
402405
if name in weight_config: # pragma: no cover
403406
num_bits = weight_config[name]["bits"]
404407
group_size = weight_config[name]["group_size"]
@@ -466,6 +469,8 @@ def rtn_quantize(
466469
)
467470
q_weight = q_weight.T if group_dim == 0 else q_weight
468471
m.weight.data.copy_(q_weight)
472+
if orig_dtype != torch.float:
473+
m = m.to(orig_dtype)
469474
return model
470475

471476

test/quantization/test_weight_only_quantization.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def test_trace(self):
5454

5555
def test_rtn(self):
5656
fp32_model = copy.deepcopy(self.model)
57+
fp16_model = copy.deepcopy(self.model).to(torch.float16)
5758
model1 = rtn_quantize(fp32_model, num_bits=3, group_size=-1)
5859
self.assertTrue(isinstance(model1.fc1, torch.nn.Linear))
5960
weight_config = {
@@ -67,7 +68,7 @@ def test_rtn(self):
6768
},
6869
}
6970
model2 = rtn_quantize(fp32_model, weight_config=weight_config)
70-
model2 = rtn_quantize(fp32_model, weight_config=weight_config, return_int=True)
71+
model2 = rtn_quantize(fp16_model, weight_config=weight_config, return_int=True)
7172
self.assertTrue(isinstance(model2.fc1, WeightOnlyLinear))
7273

7374
def test_awq(self):

0 commit comments

Comments
 (0)