unify NPU and XPU test cases into a single class

orangeH25 · orangeH25 · commit 2c7b199910fc · 2025-11-03T07:59:27.000Z
diff --git a/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py b/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
+import pytest
 import tempfile
 import unittest
 
@@ -33,103 +34,88 @@ def get_config(group_size):
     )
 
 
-@unittest.skipIf(not torch_version_at_least("2.8.0"), "Need pytorch 2.8+")
-@unittest.skipIf(not torch.xpu.is_available(), "XPU not available")
-class Int4PlainInt32TensorXPU(TestCase):
-    @parametrize(
-        "sizes",
-        [
-            ((128,), 256, 128),
-            ((32, 128), 512, 128),
-            ((2, 32, 128), 256, 12),
-        ],
-    )
-    @parametrize("dtype", [torch.bfloat16, torch.half])
-    @parametrize("group_size", [32, 64, 128])
-    def test_linear(self, sizes, dtype, group_size):
-        device = "xpu"
-        M, N, K = sizes
-        input = torch.randn(*M, K, dtype=dtype, device=device)
-        linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
-        original = linear(input)
-        quantize_(linear, get_config(group_size))
-        quantized = linear(input)
-        self.assertTrue(compute_error(original, quantized) > 20)
+_MIN_VER = {
+    "xpu": "2.8.0",
+    "npu": "2.7.1",
+}
+THRESHOLD = {"xpu": 20, "npu": 10}
 
-        compiled_linear = torch.compile(linear)
-        quantized_and_compiled = compiled_linear(input)
-        self.assertTrue(compute_error(original, quantized_and_compiled) > 20)
+ALL_DEVICES = ("xpu", "npu")
 
-    @parametrize("dtype", [torch.bfloat16, torch.half])
-    def test_module_path(self, dtype):
-        linear = torch.nn.Linear(128, 256, dtype=dtype, device="xpu")
-        quantize_(linear, get_config(group_size=128))
-        self.assertEqual(
-            str(type(linear.weight)),
-            "<class 'torchao.quantization.Int4PlainInt32Tensor'>",
-        )
 
-        with tempfile.NamedTemporaryFile() as f:
-            torch.save(linear.state_dict(), f)
-            f.seek(0)
-            state_dict = torch.load(f)
-            self.assertEqual(
-                str(type(state_dict["weight"])),
-                "<class 'torchao.quantization.Int4PlainInt32Tensor'>",
-            )
+def _get_available_devices() -> tuple[list[str], list[str]]:
+    available_devices = []
+    messages = []
+    for name in ALL_DEVICES:
+        mod = getattr(torch, name, None)
+        if mod is None:
+            messages.append(f"{name}: not found in torch")
+            continue
+        avail = mod.is_available()
+        status = []
+        status.append(f"available={avail}")
+        status.append(f"min_version_req={_MIN_VER[name]}")
+        status.append(f"torch_version={torch.__version__}")
+        if avail and torch_version_at_least(_MIN_VER[name]):
+            available_devices.append(name)
+            status.append("OK")
+        else:
+            status.append("FAIL")
+        messages.append(f"{name}: " + ", ".join(status))
 
-    def test_activation_prescaling(self):
-        dtype = torch.bfloat16
-        device = "xpu"
-        input = torch.randn(1, 128, dtype=dtype, device=device)
-        linear = torch.nn.Linear(128, 256, bias=False, dtype=dtype, device=device)
-        original = linear(input)
-        quantize_(linear, get_config(128))
-        qw = linear.weight
-        assert isinstance(qw, SupportsActivationPreScaling), (
-            "Expected int4 tensor supports activation prescaling"
-        )
-        assert qw.act_pre_scale is None, "Default `act_pre_scale` is None"
-        _ACT_PRE_SCALE = 2
-        qw.act_pre_scale = _ACT_PRE_SCALE
-        quantized = linear(input)
+    return available_devices, messages
 
-        # making sure activation pre scaling is successfully applied to the activation
-        self.assertTrue(compute_error(original * _ACT_PRE_SCALE, quantized) > 20)
+
+AVAILABLE_DEVICES, MESSAGES = _get_available_devices()
+print("\nDevice Status:")
+for msg in MESSAGES:
+    print("  ", msg)
 
 
-@unittest.skipIf(not torch_version_at_least("2.7.1"), "Need pytorch 2.7.1+")
 @unittest.skipIf(
-    torch.accelerator.current_accelerator().type != "npu"
-    or not torch.accelerator.is_available(),
-    "NPU not available",
+    not AVAILABLE_DEVICES, f"No available devices: {', '.join(ALL_DEVICES)}"
 )
-class Int4PlainInt32TensorNPU(TestCase):
-    @parametrize("device", ["npu"])
+class Int4PlainInt32Tensor(TestCase):
+    @parametrize("device", AVAILABLE_DEVICES)
     @parametrize(
         "sizes",
         [
             ((128,), 256, 128),
             ((32, 128), 512, 128),
-            ((2, 32, 128), 256, 128),
+            ((2, 32, 128), 256, 12),
         ],
     )
-    @parametrize("dtype", [torch.float16, torch.bfloat16])
-    @parametrize("group_size", [32, 64])
+    @parametrize("dtype", [torch.bfloat16, torch.half])
+    @parametrize("group_size", [32, 64, 128])
     def test_linear(self, device, sizes, dtype, group_size):
         M, N, K = sizes
+        if device == "npu" and group_size == K:
+            pytest.skip(
+                f"{device} does not support group_size equal to K dimension ({group_size} == {K})"
+            )
+        threshold = THRESHOLD.get(device)
+
         input = torch.randn(*M, K, dtype=dtype, device=device)
         linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
-        orig_output = linear(input)
+        original = linear(input)
         quantize_(linear, get_config(group_size))
-        quantized_output = linear(input)
-        self.assertTrue(compute_error(orig_output, quantized_output) > 10)
+        quantized = linear(input)
+        self.assertTrue(compute_error(original, quantized) > threshold)
 
-    @parametrize("device", ["npu"])
-    @parametrize("dtype", [torch.float16, torch.bfloat16])
+        if device == "xpu":
+            compiled_linear = torch.compile(linear)
+            quantized_and_compiled = compiled_linear(input)
+            self.assertTrue(compute_error(original, quantized_and_compiled) > threshold)
+
+    @parametrize("device", AVAILABLE_DEVICES)
+    @parametrize("dtype", [torch.bfloat16, torch.half])
     def test_module_path(self, device, dtype):
-        linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
-        quantize_(linear, get_config(group_size=64))
+        K, N, group_size = 128, 256, 128
+        if device == "npu":
+            group_size = 64
+
+        linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
+        quantize_(linear, get_config(group_size))
         self.assertEqual(
             str(type(linear.weight)),
             "<class 'torchao.quantization.Int4PlainInt32Tensor'>",
@@ -144,13 +130,21 @@ def test_module_path(self, device, dtype):
                 "<class 'torchao.quantization.Int4PlainInt32Tensor'>",
             )
 
-    @parametrize("device", ["npu"])
+    @parametrize("device", AVAILABLE_DEVICES)
     @parametrize("dtype", [torch.float16, torch.bfloat16])
     def test_activation_prescaling(self, device, dtype):
+        if device == "xpu" and dtype == torch.float16:
+            pytest.skip(f"{device} test_activation_prescaling don't test {dtype}")
+
+        threshold = THRESHOLD.get(device)
+        K, N, group_size = 128, 256, 128
+        if device == "npu":
+            group_size = 64
+
         input = torch.randn(1, 128, dtype=dtype, device=device)
         linear = torch.nn.Linear(128, 256, bias=False, dtype=dtype, device=device)
         original = linear(input)
-        quantize_(linear, get_config(64))
+        quantize_(linear, get_config(group_size))
         qw = linear.weight
         assert isinstance(qw, SupportsActivationPreScaling), (
             "Expected int4 tensor supports activation prescaling"
@@ -161,11 +155,11 @@ def test_activation_prescaling(self, device, dtype):
         quantized = linear(input)
 
         # making sure activation pre scaling is successfully applied to the activation
-        self.assertTrue(compute_error(original * _ACT_PRE_SCALE, quantized) > 10)
+        self.assertTrue(compute_error(original * _ACT_PRE_SCALE, quantized) > threshold)
+
 
+instantiate_parametrized_tests(Int4PlainInt32Tensor)
 
-instantiate_parametrized_tests(Int4PlainInt32TensorXPU)
-instantiate_parametrized_tests(Int4PlainInt32TensorNPU)
 
 if __name__ == "__main__":
     run_tests()