Skip to content

Commit b2f1f20

Browse files
committed
Enable model.to(device) for int8 weight only quantized model
Summary: Fix some implementation issue for `int8_wo_quantized_model.to(device)` Test Plan: python test/quantization/test_quant_api.py -k test_quantized_model_to_device Reviewers: Subscribers: Tasks: Tags:
1 parent 12ac498 commit b2f1f20

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

test/quantization/test_quant_api.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,19 @@ def test_quantized_tensor_subclass_save_load(self):
619619
res = m_copy(*example_inputs)
620620
self.assertEqual(res, ref)
621621

622+
def test_quantized_model_to_device(self):
623+
m = ToyLinearModel().eval().to(torch.bfloat16)
624+
m_copy = copy.deepcopy(m)
625+
example_inputs = m.example_inputs(dtype=torch.bfloat16, device="cpu")
626+
627+
quantize_(m, int8_weight_only())
628+
ref = m(*example_inputs)
629+
630+
example_inputs_cuda = (example_inputs[0].to("cuda"),)
631+
m.to(device="cuda")
632+
cuda_res = m(*example_inputs_cuda)
633+
self.assertEqual(cuda_res.cpu(), ref)
634+
622635

623636
if __name__ == "__main__":
624637
unittest.main()

torchao/dtypes/affine_quantized_tensor.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,11 @@ def _get_to_kwargs(self, *args, **kwargs):
259259

260260
def to(self, *args, **kwargs):
261261
kwargs = self._get_to_kwargs(*args, **kwargs)
262+
device = kwargs.pop("device")
263+
# not supported yet
264+
kwargs.pop("memory_format")
262265
return self.__class__(
263-
self.layout_tensor.to(kwargs["device"]),
266+
self.layout_tensor.to(device),
264267
self.block_size,
265268
self.shape,
266269
self.quant_min,
@@ -470,8 +473,8 @@ def to(self, *args, **kwargs):
470473
if device != "cuda" or (isinstance(device, torch.device) and device.type != "cuda"):
471474
raise ValueError(f"TensorCoreTiledAQTLayout is only available for cuda device")
472475
return self.__class__(
473-
self.packed_weight.to(kwargs["device"]),
474-
self.scale_and_zero.to(kwargs["device"]),
476+
self.packed_weight.to(device),
477+
self.scale_and_zero.to(device),
475478
self.transposed
476479
)
477480

0 commit comments

Comments
 (0)