Reduce complexity of DataParallelMeta.make_single_dp_test (#1369)

craymichael · facebook-github-bot · commit 44e26fadbc5e · 2024-10-15T11:32:08.000-07:00
Summary:

Reduce complexity of DataParallelMeta.make_single_dp_test

Differential Revision: D64370178
diff --git a/tests/attr/test_data_parallel.py b/tests/attr/test_data_parallel.py
@@ -4,7 +4,7 @@
 import copy
 import os
 from enum import Enum
-from typing import Any, Callable, cast, Dict, Optional, Tuple, Type
+from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type
 
 import torch
 import torch.distributed as dist
@@ -136,91 +136,22 @@ def data_parallel_test_assert(self) -> None:
                 else:
                     cuda_args[key] = args[key]
 
-            alt_device_ids = None
             cuda_model = copy.deepcopy(model).cuda()
-            # Initialize models based on DataParallelCompareMode
-            if mode is DataParallelCompareMode.cpu_cuda:
-                model_1, model_2 = model, cuda_model
-                args_1, args_2 = args, cuda_args
-            elif mode is DataParallelCompareMode.data_parallel_default:
-                model_1, model_2 = (
-                    cuda_model,
-                    torch.nn.parallel.DataParallel(cuda_model),
-                )
-                args_1, args_2 = cuda_args, cuda_args
-            elif mode is DataParallelCompareMode.data_parallel_alt_dev_ids:
-                alt_device_ids = [0] + [
-                    x for x in range(torch.cuda.device_count() - 1, 0, -1)
-                ]
-                model_1, model_2 = (
-                    cuda_model,
-                    torch.nn.parallel.DataParallel(
-                        cuda_model, device_ids=alt_device_ids
-                    ),
-                )
-                args_1, args_2 = cuda_args, cuda_args
-            elif mode is DataParallelCompareMode.dist_data_parallel:
-
-                model_1, model_2 = (
-                    cuda_model,
-                    torch.nn.parallel.DistributedDataParallel(
-                        cuda_model, device_ids=[0], output_device=0
-                    ),
-                )
-                args_1, args_2 = cuda_args, cuda_args
-            else:
-                raise AssertionError("DataParallel compare mode type is not valid.")
-
-            attr_method_1: Attribution
-            attr_method_2: Attribution
-            if target_layer:
-                internal_algorithm = cast(Type[InternalAttribution], algorithm)
-                attr_method_1 = internal_algorithm(
-                    model_1, get_target_layer(model_1, target_layer)
-                )
-                # cuda_model is used to obtain target_layer since DataParallel
-                # adds additional wrapper.
-                # model_2 is always either the CUDA model itself or DataParallel
-                if alt_device_ids is None:
-                    attr_method_2 = internal_algorithm(
-                        model_2, get_target_layer(cuda_model, target_layer)
-                    )
-                else:
-                    # LayerDeepLift and LayerDeepLiftShap do not take device ids
-                    # as a parameter, since they must always have the DataParallel
-                    # model object directly.
-                    # Some neuron methods and GuidedGradCAM also require the
-                    # model and cannot take a forward function.
-                    if issubclass(
-                        internal_algorithm,
-                        (
-                            LayerDeepLift,
-                            LayerDeepLiftShap,
-                            LayerLRP,
-                            NeuronDeepLift,
-                            NeuronDeepLiftShap,
-                            NeuronDeconvolution,
-                            NeuronGuidedBackprop,
-                            GuidedGradCam,
-                        ),
-                    ):
-                        attr_method_2 = internal_algorithm(
-                            model_2,
-                            get_target_layer(cuda_model, target_layer),  # type: ignore
-                        )
-                    else:
-                        attr_method_2 = internal_algorithm(
-                            model_2.forward,
-                            get_target_layer(cuda_model, target_layer),
-                            device_ids=alt_device_ids,
-                        )
-            else:
-                attr_method_1 = algorithm(model_1)
-                attr_method_2 = algorithm(model_2)
+            # Set up test arguments based on DataParallelCompareMode
+            model_1, model_2, args_1, args_2, alt_device_ids = _get_dp_test_args(
+                cuda_model, model, cuda_args, args, mode
+            )
 
-            if noise_tunnel:
-                attr_method_1 = NoiseTunnel(attr_method_1)
-                attr_method_2 = NoiseTunnel(attr_method_2)
+            # Construct attribution methods
+            attr_method_1, attr_method_2 = _get_dp_attr_methods(
+                algorithm,
+                target_layer,
+                model_1,
+                model_2,
+                cuda_model,
+                alt_device_ids,
+                noise_tunnel,
+            )
             if attr_method_1.has_convergence_delta():
                 attributions_1, delta_1 = attr_method_1.attribute(
                     return_convergence_delta=True, **args_1
@@ -266,6 +197,105 @@ def data_parallel_test_assert(self) -> None:
         return data_parallel_test_assert
 
 
+def _get_dp_test_args(
+    cuda_model: Module,
+    model: Module,
+    cuda_args: Dict[str, Any],
+    args: Dict[str, Any],
+    mode: DataParallelCompareMode,
+) -> Tuple[Module, Module, Dict[str, Any], Dict[str, Any], Optional[List[int]]]:
+    # Initialize models based on DataParallelCompareMode
+    alt_device_ids = None
+    if mode is DataParallelCompareMode.cpu_cuda:
+        model_1, model_2 = model, cuda_model
+        args_1, args_2 = args, cuda_args
+    elif mode is DataParallelCompareMode.data_parallel_default:
+        model_1, model_2 = (
+            cuda_model,
+            torch.nn.parallel.DataParallel(cuda_model),
+        )
+        args_1, args_2 = cuda_args, cuda_args
+    elif mode is DataParallelCompareMode.data_parallel_alt_dev_ids:
+        alt_device_ids = [0] + list(range(torch.cuda.device_count() - 1, 0, -1))
+        model_1, model_2 = (
+            cuda_model,
+            torch.nn.parallel.DataParallel(cuda_model, device_ids=alt_device_ids),
+        )
+        args_1, args_2 = cuda_args, cuda_args
+    elif mode is DataParallelCompareMode.dist_data_parallel:
+
+        model_1, model_2 = (
+            cuda_model,
+            torch.nn.parallel.DistributedDataParallel(
+                cuda_model, device_ids=[0], output_device=0
+            ),
+        )
+        args_1, args_2 = cuda_args, cuda_args
+    else:
+        raise AssertionError("DataParallel compare mode type is not valid.")
+
+    return model_1, model_2, args_1, args_2, alt_device_ids
+
+
+def _get_dp_attr_methods(
+    algorithm: Type[Attribution],
+    target_layer: Optional[str],
+    model_1: Module,
+    model_2: Module,
+    cuda_model: Module,
+    alt_device_ids: Optional[List[int]],
+    noise_tunnel: bool,
+) -> Tuple[Attribution, Attribution]:
+    if target_layer:
+        internal_algorithm = cast(Type[InternalAttribution], algorithm)
+        attr_method_1 = internal_algorithm(
+            model_1, get_target_layer(model_1, target_layer)
+        )
+        # cuda_model is used to obtain target_layer since DataParallel
+        # adds additional wrapper.
+        # model_2 is always either the CUDA model itself or DataParallel
+        if alt_device_ids is None:
+            attr_method_2 = internal_algorithm(
+                model_2, get_target_layer(cuda_model, target_layer)
+            )
+        else:
+            # LayerDeepLift and LayerDeepLiftShap do not take device ids
+            # as a parameter, since they must always have the DataParallel
+            # model object directly.
+            # Some neuron methods and GuidedGradCAM also require the
+            # model and cannot take a forward function.
+            if issubclass(
+                internal_algorithm,
+                (
+                    LayerDeepLift,
+                    LayerDeepLiftShap,
+                    LayerLRP,
+                    NeuronDeepLift,
+                    NeuronDeepLiftShap,
+                    NeuronDeconvolution,
+                    NeuronGuidedBackprop,
+                    GuidedGradCam,
+                ),
+            ):
+                attr_method_2 = internal_algorithm(
+                    model_2,
+                    get_target_layer(cuda_model, target_layer),  # type: ignore
+                )
+            else:
+                attr_method_2 = internal_algorithm(
+                    model_2.forward,
+                    get_target_layer(cuda_model, target_layer),
+                    device_ids=alt_device_ids,
+                )
+    else:
+        attr_method_1 = algorithm(model_1)
+        attr_method_2 = algorithm(model_2)
+    if noise_tunnel:
+        attr_method_1 = NoiseTunnel(attr_method_1)
+        attr_method_2 = NoiseTunnel(attr_method_2)
+    return attr_method_1, attr_method_2
+
+
 if torch.cuda.is_available() and torch.cuda.device_count() != 0:
 
     class DataParallelTest(BaseTest, metaclass=DataParallelMeta):