[release/2.6][SWDEV-523736] Fix review observations

k-artem · k-artem · commit 8b56090de8a4 · 2025-07-01T10:02:22.000-04:00
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
@@ -47,8 +47,6 @@
     parametrize,
     skipIfWindows,
     TEST_WITH_ROCM,
-    skipIfRocmArch,
-    NAVI44_ARCH,
 )
 from torch.testing._internal.two_tensor import TwoTensor
 
@@ -6410,7 +6408,7 @@ def fn(x):
         self.assertEqual(fn(inp), opt_fn(inp))
 
     @requires_cuda
-    @skipIfRocmArch(NAVI44_ARCH)
+    @unittest.skipIf(not PLATFORM_SUPPORTS_FLASH_ATTENTION, "Some archs don't support SDPA")
     def test_sdpa_dynamic_shapes(self):
         def f(x, s0, s1, s2):
             q = x.view(2, s0, s2, s0)
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
@@ -43,19 +43,16 @@ def evaluate_gfx_arch_within(arch_list):
     effective_arch = os.environ.get('PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE', gcn_arch_name)
     # gcnArchName can be complicated strings like gfx90a:sramecc+:xnack-
     # Hence the matching should be done reversely
-    result = any(arch in effective_arch for arch in arch_list)
-
-    if result and gcn_arch_name == "gfx1201":
-        os.environ['TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL'] = '1'
-
-    return result
+    return any(arch in effective_arch for arch in arch_list)
 
 def CDNA2OrLater():
     return evaluate_gfx_arch_within(["gfx90a", "gfx942"])
 
 def evaluate_platform_supports_flash_attention():
     if TEST_WITH_ROCM:
-        arch_list = ["gfx90a", "gfx942", "gfx1100", "gfx1201"]
+        arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        if os.environ.get("TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL", "0") != "0":
+            arch_list += ["gfx1201", "gfx950"]
         return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return not IS_WINDOWS and SM80OrLater
@@ -64,6 +61,8 @@ def evaluate_platform_supports_flash_attention():
 def evaluate_platform_supports_efficient_attention():
     if TEST_WITH_ROCM:
         arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        if os.environ.get("TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL", "0") != "0":
+            arch_list += ["gfx1201", "gfx950"]
         return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return True
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
@@ -111,7 +111,6 @@
 NAVI_ARCH = ("gfx1030", "gfx1100", "gfx1101", "gfx1200", "gfx1201")
 NAVI3_ARCH = ("gfx1100", "gfx1101")
 NAVI4_ARCH = ("gfx1200", "gfx1201")
-NAVI44_ARCH = "gfx1200"
 
 def is_navi3_arch():
     if torch.cuda.is_available():