Add softmax_focal_loss() to allow multi-class focal loss

dhruvbird · dhruvbird · commit 4d417855ac41 · 2023-06-18T04:27:51.000-07:00
In image segmentation tasks, focal loss is useful when trying to classify an image pixel as one of N classes. Unfortunately, sigmoid_focal_loss() isn't useful in such cases. I found that other have been asking for this as well here pytorch#3250 so I decided to submit a PR for the same.
diff --git a/torchvision/ops/focal_loss.py b/torchvision/ops/focal_loss.py
@@ -56,3 +56,81 @@ def sigmoid_focal_loss(
             f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'"
         )
     return loss
+
+def softmax_focal_loss(
+    inputs: torch.Tensor,
+    targets: torch.Tensor,
+    alpha: float = 0.25,
+    gamma: float = 2,
+    reduction: str = "none",
+) -> torch.Tensor:
+    """
+    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
+
+    Args:
+        inputs (Tensor): A float tensor of arbitrary shape.
+                The predictions for each example. Softmax() is applied on this tensor
+                to convert the raw logits to class probabilities. Expected shape is
+                (N, C, *).
+        targets (Tensor): Must be a long tensor similar to the one expected by
+                PyTorch's CrossEntropyLoss.
+                https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
+                The class dimension is expected to be absent, and each
+                element is the class value in the range [0, C).
+        alpha (float): Weighting factor in range (0,1) to balance
+                positive vs negative examples or -1 for ignore. Default: ``0.25``.
+        gamma (float): Exponent of the modulating factor (1 - p_t) to
+                balance easy vs hard examples. Default: ``2``.
+        reduction (string): ``'none'`` | ``'mean'`` | ``'sum'``
+                ``'none'``: No reduction will be applied to the output.
+                ``'mean'``: The output will be averaged.
+                ``'sum'``: The output will be summed.
+                ``'instance-sum-batch-mean'``: The output will be summed for each
+                        value in the batch, and then averaged across the entire
+                        batch. Default: ``'none'``.
+    Returns:
+        Loss tensor with the reduction option applied.
+    """
+    # Adapted from this version by Thomas V.
+    # https://discuss.pytorch.org/t/focal-loss-for-imbalanced-multi-class-classification-in-pytorch/61289/2
+    # Referenced from this github issue:
+    # https://github.com/pytorch/vision/issues/3250
+    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+        _log_api_usage_once(softmax_focal_loss)
+
+    assert targets.dtype == torch.long, f"Expected a long tensor for 'targets', but got {targets.dtype}"
+
+    logits = inputs
+    ce_loss = nn.functional.cross_entropy(logits, targets, reduction='none')
+    # Instead of computing inputs.softmax(dim=1), we use the exponentiated
+    # negative log of the cross entropy loss.
+    #
+    # Why does this work?
+    # Since this is a multi-class setting, only one class is active. The
+    # probability of that class is 1, and the rest are all 0.
+    #
+    # Cross Entropy Loss computes:
+    # pt = softmax(...)
+    # loss = -1.0 * log(pt)
+    #
+    # Hence, exp(-loss) == pt
+    #
+    # This trick works only if the targets is a long tensor. If it's a float
+    # tensor, then each each value is a probability, and we'd need to divide
+    # the result of cross entropy loss by the probability, and hence would
+    # need to compute the softmax manually anyway. We don't implement that
+    # here for brevity, but this code can be extended for such a use-case.
+    pt = torch.exp(-ce_loss)
+    focal_loss = alpha * ((1 - pt) ** gamma) * ce_loss
+    if reduction == 'none':
+        return focal_loss
+    elif reduction == 'sum':
+        return focal_loss.sum()
+    elif reduction == 'mean':
+        return focal_loss.mean()
+    elif reduction == 'instance-sum-batch-mean':
+        return focal_loss.sum() / logits.size(0)
+    else:
+        raise ValueError(
+            f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum', 'instance-sum-batch-mean'"
+        )