diff --git a/captum/attr/_core/dataloader_attr.py b/captum/attr/_core/dataloader_attr.py index 60b1e4377d..f810b9645b 100644 --- a/captum/attr/_core/dataloader_attr.py +++ b/captum/attr/_core/dataloader_attr.py @@ -3,7 +3,7 @@ # pyre-strict from collections import defaultdict from copy import copy -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Any, Callable, cast, Dict, Iterable, List, Optional, Tuple, Union import torch from captum._utils.common import ( @@ -193,8 +193,7 @@ def _forward_with_dataloader( feature_mask: Tuple[Tensor, ...], # pyre-fixme[24]: Generic type `Callable` expects 2 type parameters. reduce: Callable, - # pyre-fixme[24]: Generic type `Callable` expects 2 type parameters. - to_metric: Optional[Callable], + to_metric: Optional[Callable[[Tensor], Tensor]], show_progress: bool, feature_idx_to_mask_idx: Dict[int, List[int]], ) -> Tensor: @@ -243,7 +242,8 @@ def _forward_with_dataloader( accum_states[i] = reduce(accum_states[i], output, perturbed_inputs) - accum_results = [ + accum_states = cast(List[Tensor], accum_states) + accum_results: List[Tensor] = [ to_metric(accum) if to_metric else accum for accum in accum_states ] @@ -276,7 +276,7 @@ def attribute( Args: dataloader (torch.Dataloader): the dataloader to attribute, which should - return a tuple of consistant size for every iteration + return a tuple of consistent size for every iteration input_roles (tuple[int, ...], optional): a tuple of integers to define the role of each element returned from the dataloader. It should have the same size as the return of the dataloader. @@ -326,7 +326,7 @@ def attribute( traverses needed is ceil(n_perturbations / perturbations_per_pass). - This arguement offers control of the trade-off between memory + This argument offers control of the trade-off between memory and efficiency. If the dataloader involves slow operations like remote request or file I/O, multiple traversals can be inefficient. On the other hand, each perturbation needs to diff --git a/captum/attr/_core/lime.py b/captum/attr/_core/lime.py index f579a531dc..dc8447d1b3 100644 --- a/captum/attr/_core/lime.py +++ b/captum/attr/_core/lime.py @@ -522,7 +522,10 @@ def attribute( if show_progress: attr_progress.close() - combined_interp_inps = torch.cat(interpretable_inps).float() + # Argument 1 to "cat" has incompatible type + # "list[Tensor | tuple[Tensor, ...]]"; + # expected "tuple[Tensor, ...] | list[Tensor]" [arg-type] + combined_interp_inps = torch.cat(interpretable_inps).float() # type: ignore combined_outputs = ( torch.cat(outputs) if len(outputs[0].shape) > 0 diff --git a/captum/concept/_utils/classifier.py b/captum/concept/_utils/classifier.py index c9e7fc4022..477fa0c255 100644 --- a/captum/concept/_utils/classifier.py +++ b/captum/concept/_utils/classifier.py @@ -186,7 +186,9 @@ def train_and_eval( x_train, x_test, y_train, y_test = _train_test_split( torch.cat(inputs), torch.cat(labels), test_split=test_split_ratio ) - self.lm.device = device + # error: Incompatible types in assignment (expression has type "str | Any", + # variable has type "Tensor | Module") [assignment] + self.lm.device = device # type: ignore self.lm.fit(DataLoader(TensorDataset(x_train, y_train))) predict = self.lm(x_test) diff --git a/captum/log/__init__.py b/captum/log/__init__.py index 82e851c14e..d70dea94fe 100644 --- a/captum/log/__init__.py +++ b/captum/log/__init__.py @@ -24,7 +24,7 @@ except ImportError: from functools import wraps - def log(*args: Any, **kwargs: Any) -> None: + def log(*args: Any, **kwargs: Any) -> None: # type: ignore pass # bug with mypy: https://github.com/python/mypy/issues/1153 @@ -56,12 +56,12 @@ def wrapper(*args: Any, **kwargs: Any): return _log_usage # pyre-fixme[2]: Parameter must be annotated. - def set_environment(env) -> None: + def set_environment(env) -> None: # type: ignore pass def disable_detailed_logging() -> None: pass # pyre-fixme[2]: Parameter must be annotated. - def patch_methods(tester, patch_log: bool = True) -> None: + def patch_methods(tester, patch_log: bool = True) -> None: # type: ignore pass diff --git a/captum/module/gaussian_stochastic_gates.py b/captum/module/gaussian_stochastic_gates.py index 18bffe732d..58650fd5a6 100644 --- a/captum/module/gaussian_stochastic_gates.py +++ b/captum/module/gaussian_stochastic_gates.py @@ -81,7 +81,7 @@ def __init__( mask=mask, # pyre-fixme[6]: For 3rd argument expected `float` but got # `Optional[float]`. - reg_weight=reg_weight, + reg_weight=reg_weight, # type: ignore reg_reduction=reg_reduction, ) @@ -91,7 +91,7 @@ def __init__( # pyre-fixme[58]: `<` is not supported for operand types `int` and # `Optional[float]`. - assert 0 < std, f"the standard deviation should be positive, received {std}" + assert 0 < std, f"the standard deviation should be positive, received {std}" # type: ignore # noqa: E501 line too long self.std = std def _sample_gate_values(self, batch_size: int) -> Tensor: @@ -109,7 +109,7 @@ def _sample_gate_values(self, batch_size: int) -> Tensor: n = torch.empty(batch_size, self.n_gates, device=self.mu.device) # pyre-fixme[6]: For 2nd argument expected `float` but got # `Optional[float]`. - n.normal_(mean=0, std=self.std) + n.normal_(mean=0, std=self.std) # type: ignore return self.mu + n return self.mu.expand(batch_size, self.n_gates) diff --git a/tests/attr/helpers/gen_test_utils.py b/tests/attr/helpers/gen_test_utils.py index 5dc0f7f22b..4ac1dd5909 100644 --- a/tests/attr/helpers/gen_test_utils.py +++ b/tests/attr/helpers/gen_test_utils.py @@ -41,7 +41,7 @@ def parse_test_config( baseline_distr = ( test_config["baseline_distr"] if "baseline_distr" in test_config else False ) - return algorithms, model, args, layer, noise_tunnel, baseline_distr + return algorithms, model, args, layer, noise_tunnel, baseline_distr # type: ignore def should_create_generated_test(algorithm: Type[Attribution]) -> bool: diff --git a/tests/attr/layer/test_layer_gradient_shap.py b/tests/attr/layer/test_layer_gradient_shap.py index 045e3da77b..b50bac751f 100644 --- a/tests/attr/layer/test_layer_gradient_shap.py +++ b/tests/attr/layer/test_layer_gradient_shap.py @@ -201,7 +201,7 @@ def _assert_attributions( if expected_delta is None: assert_attribution_delta( # pyre-fixme[6]: For 1st argument expected `FbBaseTest` but got `Test`. - self, + self, # type: ignore inputs, attrs, n_samples, diff --git a/tests/attr/test_data_parallel.py b/tests/attr/test_data_parallel.py index bf89b9068a..2135e9e368 100644 --- a/tests/attr/test_data_parallel.py +++ b/tests/attr/test_data_parallel.py @@ -4,7 +4,7 @@ import copy import os from enum import Enum -from typing import Any, Callable, cast, Dict, Optional, Tuple, Type +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type import torch import torch.distributed as dist @@ -136,91 +136,22 @@ def data_parallel_test_assert(self) -> None: else: cuda_args[key] = args[key] - alt_device_ids = None cuda_model = copy.deepcopy(model).cuda() - # Initialize models based on DataParallelCompareMode - if mode is DataParallelCompareMode.cpu_cuda: - model_1, model_2 = model, cuda_model - args_1, args_2 = args, cuda_args - elif mode is DataParallelCompareMode.data_parallel_default: - model_1, model_2 = ( - cuda_model, - torch.nn.parallel.DataParallel(cuda_model), - ) - args_1, args_2 = cuda_args, cuda_args - elif mode is DataParallelCompareMode.data_parallel_alt_dev_ids: - alt_device_ids = [0] + [ - x for x in range(torch.cuda.device_count() - 1, 0, -1) - ] - model_1, model_2 = ( - cuda_model, - torch.nn.parallel.DataParallel( - cuda_model, device_ids=alt_device_ids - ), - ) - args_1, args_2 = cuda_args, cuda_args - elif mode is DataParallelCompareMode.dist_data_parallel: - - model_1, model_2 = ( - cuda_model, - torch.nn.parallel.DistributedDataParallel( - cuda_model, device_ids=[0], output_device=0 - ), - ) - args_1, args_2 = cuda_args, cuda_args - else: - raise AssertionError("DataParallel compare mode type is not valid.") - - attr_method_1: Attribution - attr_method_2: Attribution - if target_layer: - internal_algorithm = cast(Type[InternalAttribution], algorithm) - attr_method_1 = internal_algorithm( - model_1, get_target_layer(model_1, target_layer) - ) - # cuda_model is used to obtain target_layer since DataParallel - # adds additional wrapper. - # model_2 is always either the CUDA model itself or DataParallel - if alt_device_ids is None: - attr_method_2 = internal_algorithm( - model_2, get_target_layer(cuda_model, target_layer) - ) - else: - # LayerDeepLift and LayerDeepLiftShap do not take device ids - # as a parameter, since they must always have the DataParallel - # model object directly. - # Some neuron methods and GuidedGradCAM also require the - # model and cannot take a forward function. - if issubclass( - internal_algorithm, - ( - LayerDeepLift, - LayerDeepLiftShap, - LayerLRP, - NeuronDeepLift, - NeuronDeepLiftShap, - NeuronDeconvolution, - NeuronGuidedBackprop, - GuidedGradCam, - ), - ): - attr_method_2 = internal_algorithm( - model_2, - get_target_layer(cuda_model, target_layer), # type: ignore - ) - else: - attr_method_2 = internal_algorithm( - model_2.forward, - get_target_layer(cuda_model, target_layer), - device_ids=alt_device_ids, - ) - else: - attr_method_1 = algorithm(model_1) - attr_method_2 = algorithm(model_2) + # Set up test arguments based on DataParallelCompareMode + model_1, model_2, args_1, args_2, alt_device_ids = _get_dp_test_args( + cuda_model, model, cuda_args, args, mode + ) - if noise_tunnel: - attr_method_1 = NoiseTunnel(attr_method_1) - attr_method_2 = NoiseTunnel(attr_method_2) + # Construct attribution methods + attr_method_1, attr_method_2 = _get_dp_attr_methods( + algorithm, + target_layer, + model_1, + model_2, + cuda_model, + alt_device_ids, + noise_tunnel, + ) if attr_method_1.has_convergence_delta(): attributions_1, delta_1 = attr_method_1.attribute( return_convergence_delta=True, **args_1 @@ -266,6 +197,107 @@ def data_parallel_test_assert(self) -> None: return data_parallel_test_assert +def _get_dp_test_args( + cuda_model: Module, + model: Module, + cuda_args: Dict[str, Any], + args: Dict[str, Any], + mode: DataParallelCompareMode, +) -> Tuple[Module, Module, Dict[str, Any], Dict[str, Any], Optional[List[int]]]: + # Initialize models based on DataParallelCompareMode + alt_device_ids = None + if mode is DataParallelCompareMode.cpu_cuda: + model_1, model_2 = model, cuda_model + args_1, args_2 = args, cuda_args + elif mode is DataParallelCompareMode.data_parallel_default: + model_1, model_2 = ( + cuda_model, + torch.nn.parallel.DataParallel(cuda_model), + ) + args_1, args_2 = cuda_args, cuda_args + elif mode is DataParallelCompareMode.data_parallel_alt_dev_ids: + alt_device_ids = [0] + list(range(torch.cuda.device_count() - 1, 0, -1)) + model_1, model_2 = ( + cuda_model, + torch.nn.parallel.DataParallel(cuda_model, device_ids=alt_device_ids), + ) + args_1, args_2 = cuda_args, cuda_args + elif mode is DataParallelCompareMode.dist_data_parallel: + + model_1, model_2 = ( + cuda_model, + torch.nn.parallel.DistributedDataParallel( + cuda_model, device_ids=[0], output_device=0 + ), + ) + args_1, args_2 = cuda_args, cuda_args + else: + raise AssertionError("DataParallel compare mode type is not valid.") + + return model_1, model_2, args_1, args_2, alt_device_ids + + +def _get_dp_attr_methods( + algorithm: Type[Attribution], + target_layer: Optional[str], + model_1: Module, + model_2: Module, + cuda_model: Module, + alt_device_ids: Optional[List[int]], + noise_tunnel: bool, +) -> Tuple[Attribution, Attribution]: + attr_method_1: Attribution + attr_method_2: Attribution + if target_layer: + internal_algorithm = cast(Type[InternalAttribution], algorithm) + attr_method_1 = internal_algorithm( + model_1, get_target_layer(model_1, target_layer) + ) + # cuda_model is used to obtain target_layer since DataParallel + # adds additional wrapper. + # model_2 is always either the CUDA model itself or DataParallel + if alt_device_ids is None: + attr_method_2 = internal_algorithm( + model_2, get_target_layer(cuda_model, target_layer) + ) + else: + # LayerDeepLift and LayerDeepLiftShap do not take device ids + # as a parameter, since they must always have the DataParallel + # model object directly. + # Some neuron methods and GuidedGradCAM also require the + # model and cannot take a forward function. + if issubclass( + internal_algorithm, + ( + LayerDeepLift, + LayerDeepLiftShap, + LayerLRP, + NeuronDeepLift, + NeuronDeepLiftShap, + NeuronDeconvolution, + NeuronGuidedBackprop, + GuidedGradCam, + ), + ): + attr_method_2 = internal_algorithm( + model_2, + get_target_layer(cuda_model, target_layer), # type: ignore + ) + else: + attr_method_2 = internal_algorithm( + model_2.forward, + get_target_layer(cuda_model, target_layer), + device_ids=alt_device_ids, + ) + else: + attr_method_1 = algorithm(model_1) + attr_method_2 = algorithm(model_2) + if noise_tunnel: + attr_method_1 = NoiseTunnel(attr_method_1) + attr_method_2 = NoiseTunnel(attr_method_2) + return attr_method_1, attr_method_2 + + if torch.cuda.is_available() and torch.cuda.device_count() != 0: class DataParallelTest(BaseTest, metaclass=DataParallelMeta): diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 5e49b13c35..745946af71 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -10,4 +10,7 @@ ] except ImportError: - from tests.helpers.basic import BaseTest + # tests/helpers/__init__.py:13: error: Incompatible import of "BaseTest" + # (imported name has type "type[BaseTest]", local name has type + # "type[FbBaseTest]") [assignment] + from tests.helpers.basic import BaseTest # type: ignore diff --git a/tests/helpers/influence/common.py b/tests/helpers/influence/common.py index 9e7f5b5792..bedba76930 100644 --- a/tests/helpers/influence/common.py +++ b/tests/helpers/influence/common.py @@ -409,6 +409,7 @@ def get_random_model_and_data( in_features, out_features, num_samples, use_gpu, unpack_inputs ) + net: Union[BasicLinearNet, MultLinearNet, Linear, UnpackLinear] if model_type == "random": net = ( BasicLinearNet(in_features, hidden_nodes, out_features) diff --git a/tests/influence/_core/test_tracin_regression.py b/tests/influence/_core/test_tracin_regression.py index c70ba8449b..9609091698 100644 --- a/tests/influence/_core/test_tracin_regression.py +++ b/tests/influence/_core/test_tracin_regression.py @@ -31,7 +31,7 @@ class TestTracInRegression(BaseTest): def _test_tracin_regression_setup( self, tmpdir: str, features: int, use_gpu: bool = False - ) -> Tuple[RangeDataset, Dict[str, Any]]: + ) -> Tuple[RangeDataset, Dict[str, Any]]: # fixme (return type) low = 1 high = 17 dataset = RangeDataset(low, high, features, use_gpu) @@ -49,7 +49,7 @@ def _test_tracin_regression_setup( torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) # pyre-fixme[61]: `net_adjusted` is undefined, or not always defined. - return dataset, net_adjusted + return dataset, net_adjusted # type: ignore use_gpu_list = ( [True, False] diff --git a/tests/influence/_core/test_tracin_xor.py b/tests/influence/_core/test_tracin_xor.py index 83968bb909..a9ed3a389d 100644 --- a/tests/influence/_core/test_tracin_xor.py +++ b/tests/influence/_core/test_tracin_xor.py @@ -167,7 +167,7 @@ def _test_tracin_xor_setup( dataset = BinaryDataset(use_gpu) - return net_adjusted, dataset + return net_adjusted, dataset # type: ignore parametrized_list: List[ Tuple[Optional[str], DataInfluenceConstructor, str, bool] diff --git a/tests/module/test_binary_concrete_stochastic_gates.py b/tests/module/test_binary_concrete_stochastic_gates.py index 57cbba8edc..f4ada7b9ef 100644 --- a/tests/module/test_binary_concrete_stochastic_gates.py +++ b/tests/module/test_binary_concrete_stochastic_gates.py @@ -18,6 +18,9 @@ ] ) class TestBinaryConcreteStochasticGates(BaseTest): + # pyre-fixme[13]: Attribute `testing_device` is never initialized. + testing_device: str + def setUp(self) -> None: super().setUp() # pyre-fixme[16]: `TestBinaryConcreteStochasticGates` has no attribute diff --git a/tests/module/test_gaussian_stochastic_gates.py b/tests/module/test_gaussian_stochastic_gates.py index e6cb9b9140..58b90d6673 100644 --- a/tests/module/test_gaussian_stochastic_gates.py +++ b/tests/module/test_gaussian_stochastic_gates.py @@ -19,6 +19,9 @@ ] ) class TestGaussianStochasticGates(BaseTest): + # pyre-fixme[13]: Attribute `testing_device` is never initialized. + testing_device: str + def setUp(self) -> None: super().setUp() # pyre-fixme[16]: `TestGaussianStochasticGates` has no attribute