pytorch · fmassa · May 7, 2020 · Dec 20, 2019 · May 5, 2020 · May 5, 2020
diff --git a/torchvision/models/detection/_utils.py b/torchvision/models/detection/_utils.py
@@ -20,7 +20,7 @@ class BalancedPositiveNegativeSampler(object):
     """
 
     def __init__(self, batch_size_per_image, positive_fraction):
-        # type: (int, float)
+        # type: (int, float) -> None
         """
         Arguments:
             batch_size_per_image (int): number of elements to be selected per image
@@ -30,7 +30,7 @@ def __init__(self, batch_size_per_image, positive_fraction):
         self.positive_fraction = positive_fraction
 
     def __call__(self, matched_idxs):
-        # type: (List[Tensor])
+        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
         """
         Arguments:
             matched idxs: list of tensors containing -1, 0 or positive values.
@@ -139,7 +139,7 @@ class BoxCoder(object):
     """
 
     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
-        # type: (Tuple[float, float, float, float], float)
+        # type: (Tuple[float, float, float, float], float) -> None
         """
         Arguments:
             weights (4-element tuple)
@@ -149,7 +149,7 @@ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
         self.bbox_xform_clip = bbox_xform_clip
 
     def encode(self, reference_boxes, proposals):
-        # type: (List[Tensor], List[Tensor])
+        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
         boxes_per_image = [len(b) for b in reference_boxes]
         reference_boxes = torch.cat(reference_boxes, dim=0)
         proposals = torch.cat(proposals, dim=0)
@@ -173,7 +173,7 @@ def encode_single(self, reference_boxes, proposals):
         return targets
 
     def decode(self, rel_codes, boxes):
-        # type: (Tensor, List[Tensor])
+        # type: (Tensor, List[Tensor]) -> Tensor
         assert isinstance(boxes, (list, tuple))
         assert isinstance(rel_codes, torch.Tensor)
         boxes_per_image = [b.size(0) for b in boxes]
@@ -251,7 +251,7 @@ class Matcher(object):
     }
 
     def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
-        # type: (float, float, bool)
+        # type: (float, float, bool) -> None
         """
         Args:
             high_threshold (float): quality values greater than or equal to

diff --git a/torchvision/models/detection/generalized_rcnn.py b/torchvision/models/detection/generalized_rcnn.py
@@ -42,7 +42,7 @@ def eager_outputs(self, losses, detections):
         return detections
 
     def forward(self, images, targets=None):
-        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
+        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
         """
         Arguments:
             images (list[Tensor]): images to be processed

diff --git a/torchvision/models/detection/image_list.py b/torchvision/models/detection/image_list.py
@@ -14,7 +14,7 @@ class ImageList(object):
     """
 
     def __init__(self, tensors, image_sizes):
-        # type: (Tensor, List[Tuple[int, int]])
+        # type: (Tensor, List[Tuple[int, int]]) -> None
         """
         Arguments:
             tensors (tensor)
@@ -24,6 +24,6 @@ def __init__(self, tensors, image_sizes):
         self.image_sizes = image_sizes
 
     def to(self, device):
-        # type: (Device) # noqa
+        # type: (Device) -> ImageList # noqa
         cast_tensor = self.tensors.to(device)
         return ImageList(cast_tensor, self.image_sizes)
diff --git a/torchvision/models/detection/roi_heads.py b/torchvision/models/detection/roi_heads.py
@@ -15,7 +15,7 @@
 
 
 def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
-    # type: (Tensor, Tensor, List[Tensor], List[Tensor])
+    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
     """
     Computes the loss for Faster R-CNN.
 
@@ -55,7 +55,7 @@ def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
 
 
 def maskrcnn_inference(x, labels):
-    # type: (Tensor, List[Tensor])
+    # type: (Tensor, List[Tensor]) -> List[Tensor]
     """
     From the results of the CNN, post process the masks
     by taking the mask corresponding to the class with max
@@ -91,7 +91,7 @@ def maskrcnn_inference(x, labels):
 
 
 def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
-    # type: (Tensor, Tensor, Tensor, int)
+    # type: (Tensor, Tensor, Tensor, int) -> Tensor
     """
     Given segmentation masks and the bounding boxes corresponding
     to the location of the masks in the image, this function
@@ -106,7 +106,7 @@ def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
 
 
 def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
-    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor])
+    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
     """
     Arguments:
         proposals (list[BoxList])
@@ -139,7 +139,7 @@ def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs
 
 
 def keypoints_to_heatmap(keypoints, rois, heatmap_size):
-    # type: (Tensor, Tensor, int)
+    # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
     offset_x = rois[:, 0]
     offset_y = rois[:, 1]
     scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
@@ -283,7 +283,7 @@ def heatmaps_to_keypoints(maps, rois):
 
 
 def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
-    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor])
+    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
     N, K, H, W = keypoint_logits.shape
     assert H == W
     discretization_size = H
@@ -313,7 +313,7 @@ def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched
 
 
 def keypointrcnn_inference(x, boxes):
-    # type: (Tensor, List[Tensor])
+    # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
     kp_probs = []
     kp_scores = []
 
@@ -335,7 +335,7 @@ def keypointrcnn_inference(x, boxes):
 
 
 def _onnx_expand_boxes(boxes, scale):
-    # type: (Tensor, float)
+    # type: (Tensor, float) -> Tensor
     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
     h_half = (boxes[:, 3] - boxes[:, 1]) * .5
     x_c = (boxes[:, 2] + boxes[:, 0]) * .5
@@ -356,7 +356,7 @@ def _onnx_expand_boxes(boxes, scale):
 # but are kept here for the moment while we need them
 # temporarily for paste_mask_in_image
 def expand_boxes(boxes, scale):
-    # type: (Tensor, float)
+    # type: (Tensor, float) -> Tensor
     if torchvision._is_tracing():
         return _onnx_expand_boxes(boxes, scale)
     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
@@ -382,7 +382,7 @@ def expand_masks_tracing_scale(M, padding):
 
 
 def expand_masks(mask, padding):
-    # type: (Tensor, int)
+    # type: (Tensor, int) -> Tuple[Tensor, float]
     M = mask.shape[-1]
     if torch._C._get_tracing_state():  # could not import is_tracing(), not sure why
         scale = expand_masks_tracing_scale(M, padding)
@@ -393,7 +393,7 @@ def expand_masks(mask, padding):
 
 
 def paste_mask_in_image(mask, box, im_h, im_w):
-    # type: (Tensor, Tensor, int, int)
+    # type: (Tensor, Tensor, int, int) -> Tensor
     TO_REMOVE = 1
     w = int(box[2] - box[0] + TO_REMOVE)
     h = int(box[3] - box[1] + TO_REMOVE)
@@ -471,7 +471,7 @@ def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
 
 
 def paste_masks_in_image(masks, boxes, img_shape, padding=1):
-    # type: (Tensor, Tensor, Tuple[int, int], int)
+    # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
     masks, scale = expand_masks(masks, padding=padding)
     boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
     im_h, im_w = img_shape
@@ -570,7 +570,7 @@ def has_keypoint(self):
         return True
 
     def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
-        # type: (List[Tensor], List[Tensor], List[Tensor])
+        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
         matched_idxs = []
         labels = []
         for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
@@ -607,7 +607,7 @@ def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
         return matched_idxs, labels
 
     def subsample(self, labels):
-        # type: (List[Tensor])
+        # type: (List[Tensor]) -> List[Tensor]
         sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
         sampled_inds = []
         for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
@@ -618,7 +618,7 @@ def subsample(self, labels):
         return sampled_inds
 
     def add_gt_proposals(self, proposals, gt_boxes):
-        # type: (List[Tensor], List[Tensor])
+        # type: (List[Tensor], List[Tensor]) -> List[Tensor]
         proposals = [
             torch.cat((proposal, gt_box))
             for proposal, gt_box in zip(proposals, gt_boxes)
@@ -627,22 +627,25 @@ def add_gt_proposals(self, proposals, gt_boxes):
         return proposals
 
     def DELTEME_all(self, the_list):
-        # type: (List[bool])
+        # type: (List[bool]) -> bool
         for i in the_list:
             if not i:
                 return False
         return True
 
     def check_targets(self, targets):
-        # type: (Optional[List[Dict[str, Tensor]]])
+        # type: (Optional[List[Dict[str, Tensor]]]) -> None
         assert targets is not None
         assert self.DELTEME_all(["boxes" in t for t in targets])
         assert self.DELTEME_all(["labels" in t for t in targets])
         if self.has_mask():
             assert self.DELTEME_all(["masks" in t for t in targets])
 
-    def select_training_samples(self, proposals, targets):
-        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
+    def select_training_samples(self,
+                                proposals,  # type: List[Tensor]
+                                targets     # type: Optional[List[Dict[str, Tensor]]]
+                                ):
+        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
         self.check_targets(targets)
         assert targets is not None
         dtype = proposals[0].dtype
@@ -674,8 +677,13 @@ def select_training_samples(self, proposals, targets):
         regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
         return proposals, matched_idxs, labels, regression_targets
 
-    def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes):
-        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
+    def postprocess_detections(self,
+                               class_logits,    # type: Tensor
+                               box_regression,  # type: Tensor
+                               proposals,       # type: List[Tensor]
+                               image_shapes     # type: List[Tuple[int, int]]
+                               ):
+        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
         device = class_logits.device
         num_classes = class_logits.shape[-1]
 
@@ -727,8 +735,13 @@ def postprocess_detections(self, class_logits, box_regression, proposals, image_
 
         return all_boxes, all_scores, all_labels
 
-    def forward(self, features, proposals, image_shapes, targets=None):
-        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]])
+    def forward(self,
+                features,      # type: Dict[str, Tensor]
+                proposals,     # type: List[Tensor]
+                image_shapes,  # type: List[Tuple[int, int]]
+                targets=None   # type: Optional[List[Dict[str, Tensor]]]
+                ):
+        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
         """
         Arguments:
             features (List[Tensor])

diff --git a/torchvision/models/detection/rpn.py b/torchvision/models/detection/rpn.py
@@ -77,7 +77,7 @@ def __init__(
     # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios)
     # This method assumes aspect ratio = height / width for an anchor.
     def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device="cpu"):
-        # type: (List[int], List[float], int, Device)  # noqa: F821
+        # type: (List[int], List[float], int, Device) -> Tensor  # noqa: F821
         scales = torch.as_tensor(scales, dtype=dtype, device=device)
         aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
         h_ratios = torch.sqrt(aspect_ratios)
@@ -90,7 +90,7 @@ def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device="c
         return base_anchors.round()
 
     def set_cell_anchors(self, dtype, device):
-        # type: (int, Device) -> None    # noqa: F821
+        # type: (int, Device) -> None  # noqa: F821
         if self.cell_anchors is not None:
             cell_anchors = self.cell_anchors
             assert cell_anchors is not None
@@ -116,7 +116,7 @@ def num_anchors_per_location(self):
     # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
     # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
     def grid_anchors(self, grid_sizes, strides):
-        # type: (List[List[int]], List[List[Tensor]])
+        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
         anchors = []
         cell_anchors = self.cell_anchors
         assert cell_anchors is not None
@@ -149,7 +149,7 @@ def grid_anchors(self, grid_sizes, strides):
         return anchors
 
     def cached_grid_anchors(self, grid_sizes, strides):
-        # type: (List[List[int]], List[List[Tensor]])
+        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]
         key = str(grid_sizes) + str(strides)
         if key in self._cache:
             return self._cache[key]
@@ -158,7 +158,7 @@ def cached_grid_anchors(self, grid_sizes, strides):
         return anchors
 
     def forward(self, image_list, feature_maps):
-        # type: (ImageList, List[Tensor])
+        # type: (ImageList, List[Tensor]) -> List[Tensor]
         grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])
         image_size = image_list.tensors.shape[-2:]
         dtype, device = feature_maps[0].dtype, feature_maps[0].device
@@ -202,7 +202,7 @@ def __init__(self, in_channels, num_anchors):
             torch.nn.init.constant_(l.bias, 0)
 
     def forward(self, x):
-        # type: (List[Tensor])
+        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
         logits = []
         bbox_reg = []
         for feature in x:
@@ -213,15 +213,15 @@ def forward(self, x):
 
 
 def permute_and_flatten(layer, N, A, C, H, W):
-    # type: (Tensor, int, int, int, int, int)
+    # type: (Tensor, int, int, int, int, int) -> Tensor
     layer = layer.view(N, -1, C, H, W)
     layer = layer.permute(0, 3, 4, 1, 2)
     layer = layer.reshape(N, -1, C)
     return layer
 
 
 def concat_box_prediction_layers(box_cls, box_regression):
-    # type: (List[Tensor], List[Tensor])
+    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
     box_cls_flattened = []
     box_regression_flattened = []
     # for each feature level, permute the outputs to make them be in the
@@ -327,7 +327,7 @@ def post_nms_top_n(self):
         return self._post_nms_top_n['testing']
 
     def assign_targets_to_anchors(self, anchors, targets):
-        # type: (List[Tensor], List[Dict[str, Tensor]])
+        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]
         labels = []
         matched_gt_boxes = []
         for anchors_per_image, targets_per_image in zip(anchors, targets):
@@ -363,7 +363,7 @@ def assign_targets_to_anchors(self, anchors, targets):
         return labels, matched_gt_boxes
 
     def _get_top_n_idx(self, objectness, num_anchors_per_level):
-        # type: (Tensor, List[int])
+        # type: (Tensor, List[int]) -> Tensor
         r = []
         offset = 0
         for ob in objectness.split(num_anchors_per_level, 1):
@@ -378,7 +378,7 @@ def _get_top_n_idx(self, objectness, num_anchors_per_level):
         return torch.cat(r, dim=1)
 
     def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):
-        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
+        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]
         num_images = proposals.shape[0]
         device = proposals.device
         # do not backprop throught objectness
@@ -418,7 +418,7 @@ def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_
         return final_boxes, final_scores
 
     def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):
-        # type: (Tensor, Tensor, List[Tensor], List[Tensor])
+        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
         """
         Arguments:
             objectness (Tensor)
@@ -455,8 +455,12 @@ def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets)
 
         return objectness_loss, box_loss
 
-    def forward(self, images, features, targets=None):
-        # type: (ImageList, Dict[str, Tensor], Optional[List[Dict[str, Tensor]]])
+    def forward(self,
+                images,       # type: ImageList
+                features,     # type: Dict[str, Tensor]
+                targets=None  # type: Optional[List[Dict[str, Tensor]]]
+                ):
+        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]
         """
         Arguments:
             images (ImageList): images for which we want to compute the predictions