unify NPU and XPU test cases into a single class

orangeH25 · orangeH25 · commit 38b1f49729e6 · 2025-11-04T12:26:49.000Z
diff --git a/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py b/test/quantization/quantize_/workflows/int4/test_int4_plain_int32_tensor.py
@@ -33,17 +33,14 @@ def get_config(group_size):
     )
 
 
-_ALL_DEVICES = ("xpu", "npu")
-_MIN_VER = {
-    "xpu": "2.8.0",
-    "npu": "2.7.1",
-}
-_THRESHOLD = {"xpu": 20, "npu": 10}
-
-
 class Int4PlainInt32Tensor(TestCase):
+    _MIN_VER = {
+        "xpu": "2.8.0",
+        "npu": "2.7.1",
+    }
+
     def setUp(self):
-        min_req = _MIN_VER.get(self.device_type)
+        min_req = type(self)._MIN_VER.get(self.device_type)
         if not torch_version_at_least(min_req):
             self.skipTest(
                 f"{self.device_type} requires torch >= {min_req}, current {torch.__version__}"
@@ -59,14 +56,14 @@ def setUp(self):
     )
     @parametrize("dtype", [torch.bfloat16, torch.half])
     @parametrize("group_size", [32, 64, 128])
-    def test_linear(self, sizes, dtype, group_size):
-        device = self.device_type
+    @parametrize("thresholds", [{"xpu": 20, "npu": 10}])
+    def test_linear(self, device, sizes, dtype, group_size, thresholds):
         M, N, K = sizes
-        if device == "npu" and group_size == K:
+        if "npu" in device and group_size == K:
             pytest.skip(
                 f"{device} does not support group_size equal to K dimension ({group_size} == {K})"
             )
-        threshold = _THRESHOLD.get(device)
+        threshold = thresholds.get(device.split(":")[0])
 
         input = torch.randn(*M, K, dtype=dtype, device=device)
         linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
@@ -75,16 +72,16 @@ def test_linear(self, sizes, dtype, group_size):
         quantized = linear(input)
         self.assertTrue(compute_error(original, quantized) > threshold)
 
-        if device == "xpu":
+        if "xpu" in device:
             compiled_linear = torch.compile(linear)
             quantized_and_compiled = compiled_linear(input)
             self.assertTrue(compute_error(original, quantized_and_compiled) > threshold)
 
     @parametrize("dtype", [torch.bfloat16, torch.half])
-    def test_module_path(self, dtype):
+    def test_module_path(self, device, dtype):
         device = self.device_type
         K, N, group_size = 128, 256, 128
-        if device == "npu":
+        if "npu" in device:
             group_size = 64
 
         linear = torch.nn.Linear(K, N, dtype=dtype, device=device)
@@ -104,14 +101,15 @@ def test_module_path(self, dtype):
             )
 
     @parametrize("dtype", [torch.float16, torch.bfloat16])
-    def test_activation_prescaling(self, dtype):
+    @parametrize("thresholds", [{"xpu": 20, "npu": 10}])
+    def test_activation_prescaling(self, device, dtype, thresholds):
         device = self.device_type
-        if device == "xpu" and dtype == torch.float16:
+        if "xpu" in device and dtype == torch.float16:
             pytest.skip(f"{device} test_activation_prescaling don't test {dtype}")
 
-        threshold = _THRESHOLD.get(device)
+        threshold = thresholds.get(device.split(":")[0])
         K, N, group_size = 128, 256, 128
-        if device == "npu":
+        if "npu" in device:
             group_size = 64
 
         input = torch.randn(1, K, dtype=dtype, device=device)
@@ -132,7 +130,7 @@ def test_activation_prescaling(self, dtype):
 
 
 instantiate_device_type_tests(
-    Int4PlainInt32Tensor, globals(), only_for=_ALL_DEVICES, allow_xpu=True
+    Int4PlainInt32Tensor, globals(), only_for=("xpu", "npu"), allow_xpu=True
 )