Fix CI

yanbing-j · yanbing-j · commit a11e455d31f1 · 2024-07-18T20:23:59.000-07:00
diff --git a/test/quantization/test_quant_primitives.py b/test/quantization/test_quant_primitives.py
@@ -29,6 +29,7 @@
 from torchao.utils import (
     TORCH_VERSION_AFTER_2_3,
     TORCH_VERSION_AFTER_2_4,
+    TORCH_VERSION_AFTER_2_5,
     is_fbcode,
 )
 
@@ -99,7 +100,8 @@ def _groupwise_affine_quantize_tensor_from_qparams(
         .to(torch.int32)
         .reshape_as(w)
     )
-    w_int4x8 = (w_int4x8[::, ::2] << 4 | w_int4x8[::, 1::2]).to(torch.uint8)
+    if TORCH_VERSION_AFTER_2_5:
+        w_int4x8 = (w_int4x8[::, ::2] << 4 | w_int4x8[::, 1::2]).to(torch.uint8)
 
     return w_int4x8
 
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -24,6 +24,7 @@
 )
 from typing import ClassVar
 from dataclasses import dataclass
+from torchao.utils import TORCH_VERSION_AFTER_2_5
 
 aten = torch.ops.aten
 
@@ -532,8 +533,11 @@ def from_plain(
         layout_type: LayoutType
     ):
         assert isinstance(layout_type, TensorCoreTiledLayoutType)
-        int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
-        assert int_data.dtype == torch.uint8, "torch.ops.aten._convert_weight_to_int4pack expects `uint8` dtype"
+        if TORCH_VERSION_AFTER_2_5:
+            int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
+            assert int_data.dtype == torch.uint8, "torch.ops.aten._convert_weight_to_int4pack in torch 2.5 expects `uint8` dtype"
+        else:
+            assert int_data.dtype == torch.int32, "torch.ops.aten._convert_weight_to_int4pack in torch 2.4 expects `int32` dtype"
         packed_weight = torch.ops.aten._convert_weight_to_int4pack(int_data, layout_type.inner_k_tiles)
         scale = scale.reshape(int_data.shape[0], -1)
         zero_point = zero_point.reshape(int_data.shape[0], -1)
diff --git a/torchao/prototype/hqq/hqq_tinygemm_linear.py b/torchao/prototype/hqq/hqq_tinygemm_linear.py
@@ -12,6 +12,7 @@
 from hqq.core.utils import *
 
 import torch.nn.functional as F
+from torchao.utils import TORCH_VERSION_AFTER_2_5
 
 
 class HQQLinearTorchWeightOnlyInt4(torch.nn.Module):
@@ -198,7 +199,8 @@ def hqq_quants_to_torch_quants(
             .reshape(shape)
             .contiguous()
         )
-        W_q = (W_q[::, ::2] << 4 | W_q[::, 1::2]).to(torch.uint8)
+        if TORCH_VERSION_AFTER_2_5:
+            W_q = (W_q[::, ::2] << 4 | W_q[::, 1::2]).to(torch.uint8)
 
         # group_dequantize_tensor_from_qparams
         # W_r = W_q*scales + min_val
diff --git a/torchao/quantization/utils.py b/torchao/quantization/utils.py
@@ -17,6 +17,7 @@
     dequantize_affine,
     int_scaled_matmul,
 )
+from torchao.utils import TORCH_VERSION_AFTER_2_5
 
 __all__ = [
     "compute_error",
@@ -349,7 +350,8 @@ def groupwise_affine_quantize_tensor_from_qparams(
     quant_max = 2 ** n_bit - 1
 
     int_data = quantize_affine(w, block_size, scales, zeros, output_dtype, quant_min, quant_max, zero_point_domain = ZeroPointDomain.FLOAT)
-    int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
+    if TORCH_VERSION_AFTER_2_5:
+        int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
     return int_data
 
 def groupwise_affine_dequantize_tensor_from_qparams(

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@`
`29`	`29`	`from torchao.utils import (`
`30`	`30`	`TORCH_VERSION_AFTER_2_3,`
`31`	`31`	`TORCH_VERSION_AFTER_2_4,`
	`32`	`+ TORCH_VERSION_AFTER_2_5,`
`32`	`33`	`is_fbcode,`
`33`	`34`	`)`
`34`	`35`
`@@ -99,7 +100,8 @@ def _groupwise_affine_quantize_tensor_from_qparams(`
`99`	`100`	`.to(torch.int32)`
`100`	`101`	`.reshape_as(w)`
`101`	`102`	`)`
`102`		`- w_int4x8 = (w_int4x8[::, ::2] << 4 \| w_int4x8[::, 1::2]).to(torch.uint8)`
	`103`	`+ if TORCH_VERSION_AFTER_2_5:`
	`104`	`+ w_int4x8 = (w_int4x8[::, ::2] << 4 \| w_int4x8[::, 1::2]).to(torch.uint8)`
`103`	`105`
`104`	`106`	`return w_int4x8`
`105`	`107`