unify NPU and XPU test cases into a single class

orangeH25 · orangeH25 · commit 6b71049cf124 · 2025-11-04T06:29:09.000Z
diff --git a/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py b/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py
@@ -4,14 +4,13 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
-import pytest
 import tempfile
-import unittest
 
+import pytest
 import torch
+from torch.testing._internal.common_device_type import instantiate_device_type_tests
 from torch.testing._internal.common_utils import (
     TestCase,
-    instantiate_parametrized_tests,
     parametrize,
     run_tests,
 )
@@ -34,49 +33,22 @@ def get_config(group_size):
     )
 
 
+_ALL_DEVICES = ("xpu", "npu")
 _MIN_VER = {
     "xpu": "2.8.0",
     "npu": "2.7.1",
 }
-THRESHOLD = {"xpu": 20, "npu": 10}
-
-ALL_DEVICES = ("xpu", "npu")
-
-
-def _get_available_devices() -> tuple[list[str], list[str]]:
-    available_devices = []
-    messages = []
-    for name in ALL_DEVICES:
-        mod = getattr(torch, name, None)
-        if mod is None:
-            messages.append(f"{name}: not found in torch")
-            continue
-        avail = mod.is_available()
-        status = []
-        status.append(f"available={avail}")
-        status.append(f"min_version_req={_MIN_VER[name]}")
-        status.append(f"torch_version={torch.__version__}")
-        if avail and torch_version_at_least(_MIN_VER[name]):
-            available_devices.append(name)
-            status.append("OK")
-        else:
-            status.append("FAIL")
-        messages.append(f"{name}: " + ", ".join(status))
-
-    return available_devices, messages
-
-
-AVAILABLE_DEVICES, MESSAGES = _get_available_devices()
-print("\nDevice Status:")
-for msg in MESSAGES:
-    print("  ", msg)
-
-
-@unittest.skipIf(
-    not AVAILABLE_DEVICES, f"No available devices: {', '.join(ALL_DEVICES)}"
-)
+_THRESHOLD = {"xpu": 20, "npu": 10}
+
+
 class Int4PlainInt32Tensor(TestCase):
-    @parametrize("device", AVAILABLE_DEVICES)
+    def setUp(self):
+        min_req = _MIN_VER.get(self.device_type)
+        if not torch_version_at_least(min_req):
+            self.skipTest(
+                f"{self.device_type} requires torch >= {min_req}, current {torch.__version__}"
+            )
+
     @parametrize(
         "sizes",
         [
@@ -87,13 +59,14 @@ class Int4PlainInt32Tensor(TestCase):
     )
     @parametrize("dtype", [torch.bfloat16, torch.half])
     @parametrize("group_size", [32, 64, 128])
-    def test_linear(self, device, sizes, dtype, group_size):
+    def test_linear(self, sizes, dtype, group_size):
+        device = self.device_type
         M, N, K = sizes
         if device == "npu" and group_size == K:
             pytest.skip(
                 f"{device} does not support group_size equal to K dimension ({group_size} == {K})"
             )
-        threshold = THRESHOLD.get(device)
+        threshold = _THRESHOLD.get(device)
 
         input = torch.randn(*M, K, dtype=dtype, device=device)
         linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
@@ -107,9 +80,9 @@ def test_linear(self, device, sizes, dtype, group_size):
             quantized_and_compiled = compiled_linear(input)
             self.assertTrue(compute_error(original, quantized_and_compiled) > threshold)
 
-    @parametrize("device", AVAILABLE_DEVICES)
     @parametrize("dtype", [torch.bfloat16, torch.half])
-    def test_module_path(self, device, dtype):
+    def test_module_path(self, dtype):
+        device = self.device_type
         K, N, group_size = 128, 256, 128
         if device == "npu":
             group_size = 64
@@ -130,19 +103,19 @@ def test_module_path(self, device, dtype):
                 "<class 'torchao.quantization.Int4PlainInt32Tensor'>",
             )
 
-    @parametrize("device", AVAILABLE_DEVICES)
     @parametrize("dtype", [torch.float16, torch.bfloat16])
-    def test_activation_prescaling(self, device, dtype):
+    def test_activation_prescaling(self, dtype):
+        device = self.device_type
         if device == "xpu" and dtype == torch.float16:
             pytest.skip(f"{device} test_activation_prescaling don't test {dtype}")
 
-        threshold = THRESHOLD.get(device)
+        threshold = _THRESHOLD.get(device)
         K, N, group_size = 128, 256, 128
         if device == "npu":
             group_size = 64
 
-        input = torch.randn(1, 128, dtype=dtype, device=device)
-        linear = torch.nn.Linear(128, 256, bias=False, dtype=dtype, device=device)
+        input = torch.randn(1, K, dtype=dtype, device=device)
+        linear = torch.nn.Linear(K, N, bias=False, dtype=dtype, device=device)
         original = linear(input)
         quantize_(linear, get_config(group_size))
         qw = linear.weight
@@ -158,7 +131,9 @@ def test_activation_prescaling(self, device, dtype):
         self.assertTrue(compute_error(original * _ACT_PRE_SCALE, quantized) > threshold)
 
 
-instantiate_parametrized_tests(Int4PlainInt32Tensor)
+instantiate_device_type_tests(
+    Int4PlainInt32Tensor, globals(), only_for=_ALL_DEVICES, allow_xpu=True
+)
 
 
 if __name__ == "__main__":