Add antialias to FID (#3177)

SkafteNicki · Borda · web-flow · commit 533fe9572622 · 2025-07-14T12:32:23.000+02:00
* add to src
* add tests
* try seed

---------

Co-authored-by: Jirka Borovec &lt;6035284+Borda@users.noreply.github.com&gt;
Co-authored-by: Jirka B &lt;j.borovec+github@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `Lip Vertex Error (LVE)` in multimodal domain ([3090](https://github.com/Lightning-AI/torchmetrics/pull/3090))
 
 
+- Added `antialias` argument to `FID` metric ([3177](https://github.com/Lightning-AI/torchmetrics/pull/3177))
+
+
 - Added `mixed` input format to segmentation metrics ([3176](https://github.com/Lightning-AI/torchmetrics/pull/3176))
 
 ### Changed
diff --git a/src/torchmetrics/functional/video/vmaf.py b/src/torchmetrics/functional/video/vmaf.py
@@ -92,7 +92,6 @@ def video_multi_method_assessment_fusion(
     Example:
         >>> import torch
         >>> from torchmetrics.functional.video import video_multi_method_assessment_fusion
-        >>> _, _ = torch.manual_seed(42), torch.cuda.manual_seed_all(42)
         >>> # 2 videos, 3 channels, 10 frames, 32x32 resolution
         >>> preds = torch.rand(2, 3, 10, 32, 32, generator=torch.manual_seed(42))
         >>> target = torch.rand(2, 3, 10, 32, 32, generator=torch.manual_seed(43))
diff --git a/src/torchmetrics/image/fid.py b/src/torchmetrics/image/fid.py
@@ -45,11 +45,14 @@ class _FeatureExtractorInceptionV3(Module):  # type: ignore[no-redef]
 class NoTrainInceptionV3(_FeatureExtractorInceptionV3):
     """Module that never leaves evaluation mode."""
 
+    INPUT_IMAGE_SIZE: int
+
     def __init__(
         self,
         name: str,
         features_list: list[str],
         feature_extractor_weights_path: Optional[str] = None,
+        antialias: bool = True,
     ) -> None:
         if not _TORCH_FIDELITY_AVAILABLE:
             raise ModuleNotFoundError(
@@ -58,6 +61,7 @@ def __init__(
             )
 
         super().__init__(name, features_list, feature_extractor_weights_path)
+        self.use_antialias = antialias
         # put into evaluation mode
         self.eval()
 
@@ -81,11 +85,21 @@ def _torch_fidelity_forward(self, x: Tensor) -> tuple[Tensor, ...]:
         remaining_features = self.features_list.copy()
 
         x = x.to(self._dtype) if hasattr(self, "_dtype") else x.to(torch.float)
-        x = interpolate_bilinear_2d_like_tensorflow1x(
-            x,
-            size=(self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE),
-            align_corners=False,
-        )
+        if self.use_antialias:
+            x = torch.nn.functional.interpolate(
+                x,
+                size=(self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE),
+                mode="bilinear",
+                align_corners=False,
+                antialias=True,
+            )
+        else:
+            x = interpolate_bilinear_2d_like_tensorflow1x(
+                x,
+                size=(self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE),
+                align_corners=False,
+            )
+
         x = (x - 128) / 128
 
         x = self.Conv2d_1a_3x3(x)
@@ -250,6 +264,9 @@ class FrechetInceptionDistance(Metric):
               - True: if input imgs are expected to be in the data type of torch.float32.
               - False: if input imgs are expected to be in the data type of torch.int8.
         input_img_size: tuple of integers. Indicates input img size to the custom feature extractor network if provided.
+        use_antialias: boolian flag to indicate whether to use antialiasing when resizing images. This will change the
+            resize function to use bilinear interpolation with antialiasing, which is different from the original
+            Inception v3 implementation. Does not apply to custom feature extractor networks.
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
     Raises:
@@ -301,6 +318,7 @@ def __init__(
         normalize: bool = False,
         input_img_size: tuple[int, int, int] = (3, 299, 299),
         feature_extractor_weights_path: Optional[str] = None,
+        antialias: bool = True,
         **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
@@ -309,6 +327,7 @@ def __init__(
             raise ValueError("Argument `normalize` expected to be a bool")
         self.normalize = normalize
         self.used_custom_model = False
+        antialias = antialias
 
         if isinstance(feature, int):
             num_features = feature
@@ -327,6 +346,7 @@ def __init__(
                 name="inception-v3-compat",
                 features_list=[str(feature)],
                 feature_extractor_weights_path=feature_extractor_weights_path,
+                antialias=antialias,
             )
 
         elif isinstance(feature, Module):
diff --git a/src/torchmetrics/video/vmaf.py b/src/torchmetrics/video/vmaf.py
@@ -88,7 +88,6 @@ class VideoMultiMethodAssessmentFusion(Metric):
     Example:
         >>> import torch
         >>> from torchmetrics.video import VideoMultiMethodAssessmentFusion
-        >>> _, _ = torch.manual_seed(42), torch.cuda.manual_seed_all(42)
         >>> # 2 videos, 3 channels, 10 frames, 32x32 resolution
         >>> preds = torch.rand(2, 3, 10, 32, 32, generator=torch.manual_seed(42))
         >>> target = torch.rand(2, 3, 10, 32, 32, generator=torch.manual_seed(43))
diff --git a/src/torchmetrics/wrappers/feature_share.py b/src/torchmetrics/wrappers/feature_share.py
@@ -74,11 +74,13 @@ class FeatureShare(MetricCollection):
         >>> # initialize the metrics
         >>> fs = FeatureShare([FrechetInceptionDistance(), KernelInceptionDistance(subset_size=10, subsets=2)])
         >>> # update metric
-        >>> fs.update(torch.randint(255, (50, 3, 64, 64), dtype=torch.uint8), real=True)
-        >>> fs.update(torch.randint(255, (50, 3, 64, 64), dtype=torch.uint8), real=False)
+        >>> input_tensor = torch.randint(255, (50, 3, 64, 64), dtype=torch.uint8, generator=torch.manual_seed(42))
+        >>> fs.update(input_tensor, real=True)
+        >>> input_tensor = torch.randint(255, (50, 3, 64, 64), dtype=torch.uint8, generator=torch.manual_seed(43))
+        >>> fs.update(input_tensor, real=False)
         >>> # compute metric
         >>> fs.compute()
-        {'FrechetInceptionDistance': tensor(15.1700), 'KernelInceptionDistance': (tensor(-0.0012), tensor(0.0014))}
+        {'FrechetInceptionDistance': tensor(13.5367), 'KernelInceptionDistance': (tensor(0.0003), tensor(0.0003))}
 
     """
 
diff --git a/tests/unittests/image/test_fid.py b/tests/unittests/image/test_fid.py
@@ -231,3 +231,22 @@ def test_dtype_transfer_to_submodule():
 
     out = metric.inception(imgs)
     assert out.dtype == torch.float64
+
+
+def test_antialias():
+    """Test that on random input the antialiasing should produce similar results."""
+    imgs = torch.randint(0, 255, (10, 3, 299, 299), dtype=torch.uint8)
+
+    metric_no_aa = FrechetInceptionDistance(feature=64, antialias=False)
+    metric_aa = FrechetInceptionDistance(feature=64, antialias=True)
+
+    metric_no_aa.update(imgs, real=True)
+    metric_no_aa.update(imgs, real=False)
+
+    metric_aa.update(imgs, real=True)
+    metric_aa.update(imgs, real=False)
+
+    val_no_aa = metric_no_aa.compute()
+    val_aa = metric_aa.compute()
+
+    assert torch.allclose(val_no_aa, val_aa, atol=1e-3)