Skip to content

Fix require grad warning for non-leaf tensor in noise tunnel #426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions captum/_utils/gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,15 @@ def _forward_layer_eval(
additional_forward_args: Any = None,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
grad_enabled: bool = False,
) -> Tuple[Tuple[Tensor, ...], Literal[True, False]]:
return _forward_layer_eval_with_neuron_grads(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
gradient_neuron_index=None,
grad_enabled=grad_enabled,
device_ids=device_ids,
attribute_to_layer_input=attribute_to_layer_input,
)
Expand Down Expand Up @@ -311,6 +313,7 @@ def _forward_layer_eval_with_neuron_grads(
additional_forward_args: Any = None,
*,
gradient_neuron_index: Union[int, Tuple[int, ...]],
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Tuple[Tuple[Tensor, ...], Tuple[Tensor, ...], Literal[True, False]]:
Expand All @@ -324,6 +327,7 @@ def _forward_layer_eval_with_neuron_grads(
layer: Module,
additional_forward_args: Any = None,
gradient_neuron_index: None = None,
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Tuple[Tuple[Tensor, ...], Literal[True, False]]:
Expand All @@ -336,6 +340,7 @@ def _forward_layer_eval_with_neuron_grads(
layer: Module,
additional_forward_args: Any = None,
gradient_neuron_index: Union[None, int, Tuple[int, ...]] = None,
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Union[
Expand All @@ -357,13 +362,16 @@ def _forward_layer_eval_with_neuron_grads(
evals in a dictionary protected by a lock, analogous to the gather implementation
for the core PyTorch DataParallel implementation.
"""
saved_layer, is_layer_tuple = _forward_layer_distributed_eval(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
attribute_to_layer_input=attribute_to_layer_input,
)
grad_enabled = True if gradient_neuron_index is not None or grad_enabled else False

with torch.autograd.set_grad_enabled(grad_enabled):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option is to leave here set_grad_enabled(True)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good! We don't need to enable grad if unnecessary.

saved_layer, is_layer_tuple = _forward_layer_distributed_eval(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
attribute_to_layer_input=attribute_to_layer_input,
)
device_ids = _extract_device_ids(forward_fn, saved_layer, device_ids)
# Identifies correct device ordering based on device ids.
# key_list is a list of devices in appropriate ordering for concatenation.
Expand Down
4 changes: 2 additions & 2 deletions captum/attr/_core/gradient_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,9 +369,9 @@ def _scale_input(
inp_shape = (bsz,) + tuple([1] * len(inp_shape_wo_bsz))

# expand and reshape the indices
rand_coefficient = rand_coefficient.view(inp_shape).requires_grad_()
rand_coefficient = rand_coefficient.view(inp_shape)

input_baseline_scaled = (
rand_coefficient * input + (torch.tensor(1) - rand_coefficient) * baseline
)
).requires_grad_()
return input_baseline_scaled
144 changes: 76 additions & 68 deletions captum/attr/_core/noise_tunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_format_tensor_into_tuples,
_is_tuple,
)
from .._utils.attribution import Attribution
from .._utils.attribution import Attribution, GradientAttribution
from .._utils.common import _validate_noise_tunnel_type


Expand Down Expand Up @@ -63,7 +63,9 @@ def __init__(self, attribution_method: Attribution) -> None:
"""
self.attribution_method = attribution_method
self.is_delta_supported = self.attribution_method.has_convergence_delta()

self.is_gradient_method = isinstance(
self.attribution_method, GradientAttribution
)
Attribution.__init__(self, self.attribution_method.forward_func)

@log_usage()
Expand Down Expand Up @@ -165,7 +167,9 @@ def add_noise_to_inputs() -> Tuple[Tensor, ...]:
), "stdevs must be type float. " "Given: {}".format(type(stdevs))
stdevs_ = (stdevs,) * len(inputs)
return tuple(
add_noise_to_input(input, stdev)
add_noise_to_input(input, stdev).requires_grad_()
if self.is_gradient_method
else add_noise_to_input(input, stdev)
for (input, stdev) in zip(inputs, stdevs_)
)

Expand Down Expand Up @@ -199,81 +203,85 @@ def compute_expected_attribution_and_sq(attribution):
expected_attribution_sq = torch.mean(attribution ** 2, dim=1, keepdim=False)
return expected_attribution, expected_attribution_sq

# Keeps track whether original input is a tuple or not before
# converting it into a tuple.
is_inputs_tuple = isinstance(inputs, tuple)

inputs = _format_input(inputs)

_validate_noise_tunnel_type(nt_type, SUPPORTED_NOISE_TUNNEL_TYPES)

delta = None
inputs_with_noise = add_noise_to_inputs()
# if the algorithm supports targets, baselines and/or additional_forward_args
# they will be expanded based on the n_steps and corresponding kwargs
# variables will be updated accordingly
_expand_and_update_additional_forward_args(n_samples, kwargs)
_expand_and_update_target(n_samples, kwargs)
_expand_and_update_baselines(
inputs,
n_samples,
kwargs,
draw_baseline_from_distrib=draw_baseline_from_distrib,
)
with torch.no_grad():
# Keeps track whether original input is a tuple or not before
# converting it into a tuple.
is_inputs_tuple = isinstance(inputs, tuple)

inputs = _format_input(inputs)

_validate_noise_tunnel_type(nt_type, SUPPORTED_NOISE_TUNNEL_TYPES)

delta = None
inputs_with_noise = add_noise_to_inputs()
# if the algorithm supports targets, baselines and/or
# additional_forward_args they will be expanded based
# on the n_steps and corresponding kwargs
# variables will be updated accordingly
_expand_and_update_additional_forward_args(n_samples, kwargs)
_expand_and_update_target(n_samples, kwargs)
_expand_and_update_baselines(
inputs,
n_samples,
kwargs,
draw_baseline_from_distrib=draw_baseline_from_distrib,
)

# smoothgrad_Attr(x) = 1 / n * sum(Attr(x + N(0, sigma^2))
# NOTE: using __wrapped__ such that it does not log the inner logs
attributions = self.attribution_method.attribute.__wrapped__( # type: ignore
self.attribution_method, # self
inputs_with_noise if is_inputs_tuple else inputs_with_noise[0],
**kwargs,
)
# smoothgrad_Attr(x) = 1 / n * sum(Attr(x + N(0, sigma^2))
# NOTE: using __wrapped__ such that it does not log the inner logs
attr_func = self.attribution_method.attribute
attributions = attr_func.__wrapped__( # type: ignore
self.attribution_method, # self
inputs_with_noise if is_inputs_tuple else inputs_with_noise[0],
**kwargs,
)

return_convergence_delta = (
"return_convergence_delta" in kwargs and kwargs["return_convergence_delta"]
)
return_convergence_delta = (
"return_convergence_delta" in kwargs
and kwargs["return_convergence_delta"]
)

if self.is_delta_supported and return_convergence_delta:
attributions, delta = attributions
if self.is_delta_supported and return_convergence_delta:
attributions, delta = attributions

is_attrib_tuple = _is_tuple(attributions)
attributions = _format_tensor_into_tuples(attributions)
is_attrib_tuple = _is_tuple(attributions)
attributions = _format_tensor_into_tuples(attributions)

expected_attributions = []
expected_attributions_sq = []
for attribution in attributions:
expected_attr, expected_attr_sq = compute_expected_attribution_and_sq(
attribution
)
expected_attributions.append(expected_attr)
expected_attributions_sq.append(expected_attr_sq)
expected_attributions = []
expected_attributions_sq = []
for attribution in attributions:
expected_attr, expected_attr_sq = compute_expected_attribution_and_sq(
attribution
)
expected_attributions.append(expected_attr)
expected_attributions_sq.append(expected_attr_sq)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions),
is_attrib_tuple,
return_convergence_delta,
delta,
)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions),
is_attrib_tuple,
return_convergence_delta,
delta,
)
if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad_sq:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions_sq),
is_attrib_tuple,
return_convergence_delta,
delta,
)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad_sq:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions_sq),
is_attrib_tuple,
return_convergence_delta,
delta,
vargrad = tuple(
expected_attribution_sq - expected_attribution * expected_attribution
for expected_attribution, expected_attribution_sq in zip(
expected_attributions, expected_attributions_sq
)
)

vargrad = tuple(
expected_attribution_sq - expected_attribution * expected_attribution
for expected_attribution, expected_attribution_sq in zip(
expected_attributions, expected_attributions_sq
return self._apply_checks_and_return_attributions(
vargrad, is_attrib_tuple, return_convergence_delta, delta
)
)

return self._apply_checks_and_return_attributions(
vargrad, is_attrib_tuple, return_convergence_delta, delta
)

def _apply_checks_and_return_attributions(
self,
Expand Down
6 changes: 3 additions & 3 deletions tests/attr/neuron/test_neuron_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def _gradient_matching_test_assert(
while len(neuron) < len(out.shape) - 1:
neuron = neuron + (0,)
input_attrib = Saliency(
lambda x: _forward_layer_eval(model, x, output_layer)[0][0][
(slice(None), *neuron)
]
lambda x: _forward_layer_eval(
model, x, output_layer, grad_enabled=True
)[0][0][(slice(None), *neuron)]
)
sal_vals = input_attrib.attribute(test_input, abs=False)
grad_vals = gradient_attrib.attribute(test_input, neuron)
Expand Down