perturb FA

sarahtranfb · facebook-github-bot · commit 31fa27b34b80 · 2025-03-18T18:50:57.000-07:00
Differential Revision: D71435704
diff --git a/captum/attr/_core/feature_ablation.py b/captum/attr/_core/feature_ablation.py
@@ -353,10 +353,12 @@ def attribute(
                     formatted_feature_mask,
                     attr_progress,
                     flattened_initial_eval,
+                    initial_eval,
                     n_outputs,
                     total_attrib,
                     weights,
                     attrib_type,
+                    perturbations_per_eval,
                     **kwargs,
                 )
             else:
@@ -470,10 +472,12 @@ def _attribute_with_cross_tensor_feature_masks(
         formatted_feature_mask: Tuple[Tensor, ...],
         attr_progress: Optional[Union[SimpleProgress[IterableType], tqdm]],
         flattened_initial_eval: Tensor,
+        initial_eval: Tensor,
         n_outputs: int,
         total_attrib: List[Tensor],
         weights: List[Tensor],
         attrib_type: dtype,
+        perturbations_per_eval: int,
         **kwargs: Any,
     ) -> Tuple[List[Tensor], List[Tensor]]:
         feature_idx_to_tensor_idx: Dict[int, List[int]] = {}
@@ -482,17 +486,66 @@ def _attribute_with_cross_tensor_feature_masks(
                 if feature_idx.item() not in feature_idx_to_tensor_idx:
                     feature_idx_to_tensor_idx[feature_idx.item()] = []
                 feature_idx_to_tensor_idx[feature_idx.item()].append(i)
+        all_feature_idxs = list(feature_idx_to_tensor_idx.keys())
+        additional_args_repeated: object
+        if perturbations_per_eval > 1:
+            additional_args_repeated = (
+                _expand_additional_forward_args(
+                    formatted_additional_forward_args, perturbations_per_eval
+                )
+                if formatted_additional_forward_args is not None
+                else None
+            )
+            target_repeated = _expand_target(target, perturbations_per_eval)
+        else:
+            additional_args_repeated = formatted_additional_forward_args
+            target_repeated = target
+        num_examples = formatted_inputs[0].shape[0]
+
+        current_additional_args: object
+        # Process one feature per time, rather than processing every input tensor
+        for i in range(0, len(all_feature_idxs), perturbations_per_eval):
+            current_feature_idxs = all_feature_idxs[i : i + perturbations_per_eval]
+            current_num_ablated_features = min(
+                perturbations_per_eval, len(current_feature_idxs)
+            )
+
+            # Store appropriate inputs and additional args based on batch size.
+            if current_num_ablated_features != perturbations_per_eval:
+                current_additional_args = (
+                    _expand_additional_forward_args(
+                        formatted_additional_forward_args, current_num_ablated_features
+                    )
+                    if formatted_additional_forward_args is not None
+                    else None
+                )
+                current_target = _expand_target(target, current_num_ablated_features)
+            else:
+                current_additional_args = additional_args_repeated
+                current_target = target_repeated
+
+            current_inputs = ()
+            current_masks = []
+            for (
+                single_perturb_input,
+                single_perturb_masks,
+            ) in self._ablation_generator(
+                formatted_inputs,
+                baselines,
+                formatted_feature_mask,
+                current_feature_idxs,
+                feature_idx_to_tensor_idx,
+                **kwargs,
+            ):
+                if len(current_inputs) == 0:
+                    current_inputs = single_perturb_input
+                else:
+                    current_inputs = tuple(
+                        torch.cat((current_inputs[j], single_perturb_input[j]), dim=0)
+                        for j in range(len(current_inputs))
+                    )
+                current_masks.append(list(single_perturb_masks))
 
-        for (
-            current_inputs,
-            current_mask,
-        ) in self._ablation_generator(
-            formatted_inputs,
-            baselines,
-            formatted_feature_mask,
-            feature_idx_to_tensor_idx,
-            **kwargs,
-        ):
             # modified_eval has (n_feature_perturbed * n_outputs) elements
             # shape:
             #   agg mode: (*initial_eval.shape)
@@ -501,8 +554,8 @@ def _attribute_with_cross_tensor_feature_masks(
             modified_eval = _run_forward(
                 self.forward_func,
                 current_inputs,
-                target,
-                formatted_additional_forward_args,
+                current_target,
+                current_additional_args,
             )
 
             if attr_progress is not None:
@@ -515,13 +568,16 @@ def _attribute_with_cross_tensor_feature_masks(
 
             total_attrib, weights = self._process_ablated_out_full(
                 modified_eval,
-                current_mask,
+                current_masks,
                 flattened_initial_eval,
-                formatted_inputs,
+                initial_eval,
+                current_inputs,
                 n_outputs,
+                num_examples,
                 total_attrib,
                 weights,
                 attrib_type,
+                perturbations_per_eval,
             )
         return total_attrib, weights
 
@@ -530,6 +586,7 @@ def _ablation_generator(
         inputs: Tuple[Tensor, ...],
         baselines: BaselineType,
         input_mask: Tuple[Tensor, ...],
+        feature_idxs: List[int],
         feature_idx_to_tensor_idx: Dict[int, List[int]],
         **kwargs: Any,
     ) -> Generator[
@@ -540,11 +597,8 @@ def _ablation_generator(
         None,
         None,
     ]:
-        if isinstance(baselines, torch.Tensor):
-            baselines = baselines.reshape((1,) + tuple(baselines.shape))
-
         # Process one feature per time, rather than processing every input tensor
-        for feature_idx in feature_idx_to_tensor_idx.keys():
+        for feature_idx in feature_idxs:
             ablated_inputs, current_masks = (
                 self._construct_ablated_input_across_tensors(
                     inputs,
@@ -784,7 +838,7 @@ def _attribute_progress_setup(
             formatted_inputs, feature_mask, **kwargs
         )
         total_forwards = (
-            int(sum(feature_counts))
+            math.ceil(int(sum(feature_counts)) / perturbations_per_eval)
             if enable_cross_tensor_attribution
             else sum(
                 math.ceil(count / perturbations_per_eval) for count in feature_counts
@@ -1187,43 +1241,76 @@ def _process_ablated_out(
             weights[i] += current_mask.float().sum(dim=0)
 
         total_attrib[i] += (eval_diff * current_mask.to(attrib_type)).sum(dim=0)
+        print(i, weights)
         return total_attrib, weights
 
     def _process_ablated_out_full(
         self,
         modified_eval: Tensor,
-        current_mask: Tuple[Optional[Tensor], ...],
+        current_mask: List[List[Optional[Tensor]]],
         flattened_initial_eval: Tensor,
+        initial_eval: Tensor,
         inputs: TensorOrTupleOfTensorsGeneric,
         n_outputs: int,
+        num_examples: int,
         total_attrib: List[Tensor],
         weights: List[Tensor],
         attrib_type: dtype,
+        perturbations_per_eval: int,
     ) -> Tuple[List[Tensor], List[Tensor]]:
         modified_eval = self._parse_forward_out(modified_eval)
+        # if perturbations_per_eval > 1, the output shape must grow with
+        # input and not be aggregated
+        current_batch_size = inputs[0].shape[0]
+
+        # number of perturbation, which is not the same as
+        # perturbations_per_eval when not enough features to perturb
+        n_perturb = current_batch_size / num_examples
+        if perturbations_per_eval > 1 and not self._is_output_shape_valid:
+
+            current_output_shape = modified_eval.shape
+
+            # use initial_eval as the forward of perturbations_per_eval = 1
+            initial_output_shape = initial_eval.shape
+
+            assert (
+                # check if the output is not a scalar
+                current_output_shape
+                and initial_output_shape
+                # check if the output grow in same ratio, i.e., not agg
+                and current_output_shape[0] == n_perturb * initial_output_shape[0]
+            ), (
+                "When perturbations_per_eval > 1, forward_func's output "
+                "should be a tensor whose 1st dim grow with the input "
+                f"batch size: when input batch size is {num_examples}, "
+                f"the output shape is {initial_output_shape}; "
+                f"when input batch size is {current_batch_size}, "
+                f"the output shape is {current_output_shape}"
+            )
+
+            self._is_output_shape_valid = True
 
         # reshape the leading dim for n_feature_perturbed
         # flatten each feature's eval outputs into 1D of (n_outputs)
         modified_eval = modified_eval.reshape(-1, n_outputs)
         # eval_diff in shape (n_feature_perturbed, n_outputs)
         eval_diff = flattened_initial_eval - modified_eval
-        eval_diff_shape = eval_diff.shape
-
-        # append the shape of one input example
-        # to make it broadcastable to mask
 
-        if self.use_weights:
-            for weight, mask in zip(weights, current_mask):
-                if mask is not None:
-                    weight += mask.float().sum(dim=0)
-        for i, mask in enumerate(current_mask):
-            if mask is None or inputs[i].numel() == 0:
-                continue
-            eval_diff = eval_diff.reshape(
-                eval_diff_shape + (inputs[i].dim() - 1) * (1,)
-            )
-            eval_diff = eval_diff.to(total_attrib[i].device)
-            total_attrib[i] += (eval_diff * mask.to(attrib_type)).sum(dim=0)
+        for j in range(int(n_perturb)):
+            single_perturb_mask = current_mask[j]
+            if self.use_weights:
+                for weight, mask in zip(weights, single_perturb_mask):
+                    if mask is not None:
+                        weight += mask.float()
+            for i, mask in enumerate(single_perturb_mask):
+                this_input = inputs[i][j * num_examples : (j + 1) * num_examples]
+                if mask is None or this_input.numel() == 0:
+                    continue
+                eval_diff_j = eval_diff[j].reshape(
+                    eval_diff[j].shape + (this_input.dim() - 1) * (1,)
+                )
+                eval_diff_j = eval_diff_j.to(total_attrib[i].device)
+                total_attrib[i] += eval_diff_j * mask.to(attrib_type)
         return total_attrib, weights
 
     def _fut_tuple_to_accumulate_fut_list(
diff --git a/tests/attr/test_feature_ablation.py b/tests/attr/test_feature_ablation.py
@@ -164,6 +164,19 @@ def test_multi_sample_ablation_with_mask(self) -> None:
             perturbations_per_eval=(1, 2, 3),
         )
 
+    def test_multi_sample_ablation_with_mask_weighted(self) -> None:
+        ablation_algo = FeatureAblation(BasicModel_MultiLayer())
+        ablation_algo.use_weights = True
+        inp = torch.tensor([[2.0, 10.0, 3.0], [20.0, 50.0, 30.0]], requires_grad=True)
+        mask = torch.tensor([[0, 0, 1], [1, 1, 0]])
+        self._ablation_test_assert(
+            ablation_algo,
+            inp,
+            [[41.0, 41.0, 12.0], [280.0, 280.0, 120.0]],
+            feature_mask=mask,
+            perturbations_per_eval=(1, 2, 3),
+        )
+
     def test_multi_input_ablation_with_mask(self) -> None:
         ablation_algo = FeatureAblation(BasicModel_MultiLayer_MultiInput())
         inp1 = torch.tensor([[23.0, 100.0, 0.0], [20.0, 50.0, 30.0]])
@@ -207,6 +220,50 @@ def test_multi_input_ablation_with_mask(self) -> None:
             perturbations_per_eval=(1, 2, 3),
         )
 
+    def test_multi_input_ablation_with_mask_weighted(self) -> None:
+        ablation_algo = FeatureAblation(BasicModel_MultiLayer_MultiInput())
+        ablation_algo.use_weights = True
+        inp1 = torch.tensor([[23.0, 100.0, 0.0], [20.0, 50.0, 30.0]])
+        inp2 = torch.tensor([[20.0, 50.0, 30.0], [0.0, 100.0, 0.0]])
+        inp3 = torch.tensor([[0.0, 100.0, 10.0], [2.0, 10.0, 3.0]])
+        mask1 = torch.tensor([[1, 1, 1], [0, 1, 0]])
+        mask2 = torch.tensor([[3, 4, 2]])
+        mask3 = torch.tensor([[5, 6, 7], [5, 5, 5]])
+        expected = (
+            [[492.0, 492.0, 492.0], [200.0, 200.0, 200.0]],
+            [[80.0, 200.0, 120.0], [0.0, 400.0, 0.0]],
+            [[0.0, 400.0, 40.0], [60.0, 60.0, 60.0]],
+        )
+        self._ablation_test_assert(
+            ablation_algo,
+            (inp1, inp2, inp3),
+            expected,
+            additional_input=(1,),
+            feature_mask=(mask1, mask2, mask3),
+        )
+        self._ablation_test_assert(
+            ablation_algo,
+            (inp1, inp2),
+            expected[0:1],
+            additional_input=(inp3, 1),
+            feature_mask=(mask1, mask2),
+            perturbations_per_eval=(1, 2, 3),
+        )
+        expected_with_baseline = (
+            [[468.0, 468.0, 468.0], [184.0, 192.0, 184.0]],
+            [[68.0, 188.0, 108.0], [-12.0, 388.0, -12.0]],
+            [[-16.0, 384.0, 24.0], [12.0, 12.0, 12.0]],
+        )
+        self._ablation_test_assert(
+            ablation_algo,
+            (inp1, inp2, inp3),
+            expected_with_baseline,
+            additional_input=(1,),
+            feature_mask=(mask1, mask2, mask3),
+            baselines=(2, 3.0, 4),
+            perturbations_per_eval=(1, 2, 3),
+        )
+
     def test_multi_input_ablation_with_mask_dupe_feature_idx(self) -> None:
         ablation_algo = FeatureAblation(BasicModel_MultiLayer_MultiInput())
         inp1 = torch.tensor([[23.0, 100.0, 0.0], [20.0, 50.0, 30.0]])