add more changes for XPU

zhangxiaoli73 · zhangxiaoli73 · commit b89bedf5d71d · 2025-01-22T17:20:19.000+08:00
diff --git a/test/distributed/fsdp/test_fsdp_comm.py b/test/distributed/fsdp/test_fsdp_comm.py
@@ -382,7 +382,7 @@ def forward(self, x: torch.Tensor):
             model.module.mlps._wait_unshard_streams_on_current_stream()
 
 
-devices = ("cuda", "hpu")
+devices = ("cuda", "hpu", "xpu")
 instantiate_device_type_tests(TestCommunication, globals(), only_for=devices)
 instantiate_device_type_tests(TestExplicitUnshard, globals(), only_for=devices)
 if __name__ == "__main__":
diff --git a/test/distributed/tensor/test_dtensor_compile.py b/test/distributed/tensor/test_dtensor_compile.py
@@ -43,6 +43,7 @@
     skipIfTorchDynamo,
     TEST_CUDA,
     TEST_HPU,
+    TEST_XPU,
 )
 from torch.testing._internal.distributed._tensor.common_dtensor import (
     DTensorTestBase,
@@ -108,7 +109,14 @@ def tearDown(self):
 
     @property
     def device_type(self) -> str:
-        return "cuda" if TEST_CUDA else "hpu" if TEST_HPU else "cpu"
+        if TEST_CUDA:
+            return "cuda"
+        elif TEST_HPU:
+            return "hpu"
+        elif TEST_XPU:
+            return "xpu"
+        else:
+            return "xpu"
 
     @property
     def world_size(self) -> int:
diff --git a/test/distributed/tensor/test_random_ops.py b/test/distributed/tensor/test_random_ops.py
@@ -19,16 +19,20 @@
 )
 from torch.distributed.tensor.debug import CommDebugMode
 from torch.distributed.tensor.parallel import ColwiseParallel, parallelize_module
-from torch.testing._internal.common_utils import run_tests, TEST_HPU
+from torch.testing._internal.common_utils import run_tests, TEST_HPU, TEST_XPU
 from torch.testing._internal.distributed._tensor.common_dtensor import (
     DTensorTestBase,
     skip_if_lt_x_gpu,
     skip_unless_torch_gpu,
     with_comms,
 )
 
-
-TYPE_DEVICE = "hpu" if TEST_HPU else "cuda"
+if TEST_XPU:
+    TYPE_DEVICE = "xpu"
+elif TEST_HPU:
+    TYPE_DEVICE = "hpu"
+else:
+    TYPE_DEVICE = "cuda"
 
 
 class DistTensorRandomInitTest(DTensorTestBase):
diff --git a/test/distributed/tensor/test_redistribute.py b/test/distributed/tensor/test_redistribute.py
@@ -9,7 +9,7 @@
 from torch.distributed.device_mesh import init_device_mesh
 from torch.distributed.tensor._collective_utils import shard_dim_alltoall
 from torch.distributed.tensor.debug import CommDebugMode
-from torch.testing._internal.common_utils import run_tests, TEST_CUDA, TEST_HPU
+from torch.testing._internal.common_utils import run_tests, TEST_CUDA, TEST_HPU, TEST_XPU
 from torch.testing._internal.distributed._tensor.common_dtensor import (
     DTensorTestBase,
     with_comms,
@@ -366,7 +366,7 @@ def test_redistribute_shard_dim_change(self):
                 local_out_dt = out_dt.to_local()
                 local_expected_dt = expected_dt.to_local()
                 self.assertEqual(out_dt.to_local(), expected_dt.to_local())
-                if TEST_HPU or TEST_CUDA:
+                if TEST_HPU or TEST_CUDA or TEST_XPU:
                     self.assertEqual(
                         comm_mode.get_comm_counts()[
                             torch.ops._dtensor.shard_dim_alltoall
diff --git a/test/distributed/test_backends.py b/test/distributed/test_backends.py
@@ -44,7 +44,7 @@ def test_create_pg(self, device) -> None:
         dist.destroy_process_group()
 
 
-devices = ["cpu", "cuda", "hpu"]
+devices = ["cpu", "cuda", "hpu", "xpu"]
 instantiate_device_type_tests(TestMiscCollectiveUtils, globals(), only_for=devices)
 
 if __name__ == "__main__":
diff --git a/test/distributed/test_functional_api.py b/test/distributed/test_functional_api.py
@@ -34,6 +34,7 @@
     skipIfHpu,
     TEST_CUDA,
     TEST_HPU,
+    TEST_XPU,
     TestCase,
 )
 
@@ -66,6 +67,9 @@
     DEVICE = "hpu"
 elif TEST_CUDA:
     devices.append("cuda")
+elif TEST_XPU:
+    devices.append("xpu")
+    DEVICE = "xpu"
 
 
 def new_subgroups(group_size: int, pg_tag=None):
@@ -474,6 +478,8 @@ def allred_mesh_dim(input):
 # And then set the BACKEND variable appropriately.
 if TEST_HPU:
     BACKEND = dist.Backend.HCCL
+elif TEST_XPU:
+    BACKEND = dist.Backend.XCCL
 
 
 # allows you to check for multiple accelerator irrespective of device type
@@ -486,6 +492,9 @@ def exit_if_lt_x_accelerators(x):
     elif TEST_HPU:
         if torch.hpu.device_count() < x:
             sys.exit(TEST_SKIPS[f"multi-hpu-{x}"].exit_code)
+    elif TEST_XPU:
+        if torch.xpu.device_count() < x:
+            sys.exit(TEST_SKIPS[f"multi-hpu-{x}"].exit_code)
 
 
 def with_comms(func=None):
diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py
@@ -1440,7 +1440,7 @@ def _test_fsdp_parity(
             self.assertRaisesRegex(
                 RuntimeError,
                 "An FSDP-managed module with parameter CPU offloading enabled "
-                "has parameters on cuda",
+                "has parameters on xpu", #zl_debug: refine for xpu
             )
             if expects_device_error
             else nullcontext()

Original file line number	Diff line number	Diff line change
`@@ -1440,7 +1440,7 @@ def _test_fsdp_parity(`
`1440`	`1440`	`self.assertRaisesRegex(`
`1441`	`1441`	`RuntimeError,`
`1442`	`1442`	`"An FSDP-managed module with parameter CPU offloading enabled "`
`1443`		`- "has parameters on cuda",`
	`1443`	`+ "has parameters on xpu", #zl_debug: refine for xpu`
`1444`	`1444`	`)`
`1445`	`1445`	`if expects_device_error`
`1446`	`1446`	`else nullcontext()`