From 606363b83bdc59568920d4228ebbbf4d9247aa19 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 9 Nov 2022 16:30:44 +0100
Subject: [PATCH 1/6] fix prototype transforms tests with set agg_method

---
 test/prototype_common_utils.py                |  69 ++++--------
 test/prototype_transforms_kernel_infos.py     | 106 ++++++++++++------
 test/test_prototype_transforms_consistency.py |  12 +-
 test/test_prototype_transforms_functional.py  |   1 -
 4 files changed, 100 insertions(+), 88 deletions(-)

diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py
index 85583bcca8c..86ed5c22997 100644
--- a/test/prototype_common_utils.py
+++ b/test/prototype_common_utils.py
@@ -12,17 +12,9 @@
 import torch.testing
 from datasets_utils import combinations_grid
 from torch.nn.functional import one_hot
-from torch.testing._comparison import (
-    assert_equal as _assert_equal,
-    BooleanPair,
-    ErrorMeta,
-    NonePair,
-    NumberPair,
-    TensorLikePair,
-    UnsupportedInputs,
-)
+from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair
 from torchvision.prototype import features
-from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor
+from torchvision.prototype.transforms.functional import to_image_tensor
 from torchvision.transforms.functional_tensor import _max_value as get_max_value
 
 __all__ = [
@@ -54,7 +46,7 @@
 ]
 
 
-class PILImagePair(TensorLikePair):
+class ImagePair(TensorLikePair):
     def __init__(
         self,
         actual,
@@ -64,44 +56,13 @@ def __init__(
         allowed_percentage_diff=None,
         **other_parameters,
     ):
-        if not any(isinstance(input, PIL.Image.Image) for input in (actual, expected)):
-            raise UnsupportedInputs()
-
-        # This parameter is ignored to enable checking PIL images to tensor images no on the CPU
-        other_parameters["check_device"] = False
+        if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
+            actual, expected = [to_image_tensor(input) for input in [actual, expected]]
 
         super().__init__(actual, expected, **other_parameters)
         self.agg_method = getattr(torch, agg_method) if isinstance(agg_method, str) else agg_method
         self.allowed_percentage_diff = allowed_percentage_diff
 
-    def _process_inputs(self, actual, expected, *, id, allow_subclasses):
-        actual, expected = [
-            to_image_tensor(input) if not isinstance(input, torch.Tensor) else features.Image(input)
-            for input in [actual, expected]
-        ]
-        # This broadcast is needed, because `features.Mask`'s can have a 2D shape, but converting the equivalent PIL
-        # image to a tensor adds a singleton leading dimension.
-        # Although it looks like this belongs in `self._equalize_attributes`, it has to happen here.
-        # `self._equalize_attributes` is called after `super()._compare_attributes` and that has an unconditional
-        # shape check that will fail if we don't broadcast before.
-        try:
-            actual, expected = torch.broadcast_tensors(actual, expected)
-        except RuntimeError:
-            raise ErrorMeta(
-                AssertionError,
-                f"The image shapes are not broadcastable: {actual.shape} != {expected.shape}.",
-                id=id,
-            ) from None
-        return super()._process_inputs(actual, expected, id=id, allow_subclasses=allow_subclasses)
-
-    def _equalize_attributes(self, actual, expected):
-        if actual.dtype != expected.dtype:
-            dtype = torch.promote_types(actual.dtype, expected.dtype)
-            actual = convert_dtype_image_tensor(actual, dtype)
-            expected = convert_dtype_image_tensor(expected, dtype)
-
-        return super()._equalize_attributes(actual, expected)
-
     def compare(self) -> None:
         actual, expected = self.actual, self.expected
 
@@ -111,16 +72,24 @@ def compare(self) -> None:
         abs_diff = torch.abs(actual - expected)
 
         if self.allowed_percentage_diff is not None:
-            percentage_diff = (abs_diff != 0).to(torch.float).mean()
+            percentage_diff = float((abs_diff.ne(0).to(torch.float64).mean()))
             if percentage_diff > self.allowed_percentage_diff:
-                self._make_error_meta(AssertionError, "percentage mismatch")
+                raise self._make_error_meta(
+                    AssertionError,
+                    f"{percentage_diff:.1%} elements differ, "
+                    f"but only {self.allowed_percentage_diff:.1%} is allowed",
+                )
 
         if self.agg_method is None:
             super()._compare_values(actual, expected)
         else:
-            err = self.agg_method(abs_diff.to(torch.float64))
-            if err > self.atol:
-                self._make_error_meta(AssertionError, "aggregated mismatch")
+            agg_abs_diff = float(self.agg_method(abs_diff.to(torch.float64)))
+            if agg_abs_diff > self.atol:
+                raise self._make_error_meta(
+                    AssertionError,
+                    f"The '{self.agg_method.__name__}' of the absolute difference is {agg_abs_diff}, "
+                    f"but only {self.atol} is allowed.",
+                )
 
 
 def assert_close(
@@ -148,7 +117,7 @@ def assert_close(
             NonePair,
             BooleanPair,
             NumberPair,
-            PILImagePair,
+            ImagePair,
             TensorLikePair,
         ),
         allow_subclasses=allow_subclasses,
diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
index 52ff4565a55..308e7cfb0de 100644
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -4,6 +4,7 @@
 import math
 
 import numpy as np
+import PIL.Image
 import pytest
 import torch.testing
 import torchvision.ops
@@ -62,8 +63,8 @@ def __init__(
 
 
 DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS = {
-    (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1e-5, rtol=0, agg_method="mean"),
-    (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1e-5, rtol=0, agg_method="mean"),
+    (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=0.9, rtol=0, agg_method="mean"),
+    (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=255 * 0.9, rtol=0, agg_method="mean"),
 }
 
 CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE = {
@@ -74,14 +75,26 @@ def __init__(
 
 def pil_reference_wrapper(pil_kernel):
     @functools.wraps(pil_kernel)
-    def wrapper(image_tensor, *other_args, **kwargs):
-        if image_tensor.ndim > 3:
+    def wrapper(input_tensor, *other_args, **kwargs):
+        if input_tensor.ndim > 3:
             raise pytest.UsageError(
-                f"Can only test single tensor images against PIL, but input has shape {image_tensor.shape}"
+                f"Can only test single tensor images against PIL, but input has shape {input_tensor.shape}"
             )
 
-        # We don't need to convert back to tensor here, since `assert_close` does that automatically.
-        return pil_kernel(F.to_image_pil(image_tensor), *other_args, **kwargs)
+        input_pil = F.to_image_pil(input_tensor)
+        output_pil = pil_kernel(input_pil, *other_args, **kwargs)
+        if not isinstance(output_pil, PIL.Image.Image):
+            return output_pil
+
+        output_tensor = F.convert_dtype_image_tensor(F.to_image_tensor(output_pil), dtype=input_tensor.dtype)
+
+        # 2D mask shenanigans
+        if output_tensor.ndim == 2 and input_tensor.ndim == 3:
+            output_tensor = output_tensor.unsqueeze(0)
+        elif output_tensor.ndim == 3 and input_tensor.ndim == 2:
+            output_tensor = output_tensor.squeeze(0)
+
+        return output_tensor
 
     return wrapper
 
@@ -400,6 +413,23 @@ def _full_affine_params(**partial_params):
 ]
 
 
+def _get_fills(*, num_channels, dtype, vector=True):
+    yield None
+
+    max_value = get_max_value(dtype)
+    # This intentionally gives us a float and an int scalar fill value
+    yield max_value / 2
+    yield max_value
+
+    if not vector:
+        return
+
+    if dtype.is_floating_point:
+        yield [0.1 + c / 10 for c in range(num_channels)]
+    else:
+        yield [12.0 + c for c in range(num_channels)]
+
+
 def sample_inputs_affine_image_tensor():
     make_affine_image_loaders = functools.partial(
         make_image_loaders, sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32]
@@ -409,10 +439,7 @@ def sample_inputs_affine_image_tensor():
         yield ArgsKwargs(image_loader, **affine_params)
 
     for image_loader in make_affine_image_loaders():
-        fills = [None, 0.5]
-        if image_loader.num_channels > 1:
-            fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]])
-        for fill in fills:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, **_full_affine_params(), fill=fill)
 
     for image_loader, interpolation in itertools.product(
@@ -631,7 +658,9 @@ def reference_inputs_convert_format_bounding_box():
 
 
 def sample_inputs_convert_color_space_image_tensor():
-    color_spaces = list(set(features.ColorSpace) - {features.ColorSpace.OTHER})
+    color_spaces = sorted(
+        set(features.ColorSpace) - {features.ColorSpace.OTHER}, key=lambda color_space: color_space.value
+    )
 
     for old_color_space, new_color_space in cycle_over(color_spaces):
         for image_loader in make_image_loaders(sizes=["random"], color_spaces=[old_color_space], constant_alpha=True):
@@ -678,7 +707,10 @@ def sample_inputs_convert_color_space_video():
             sample_inputs_fn=sample_inputs_convert_color_space_image_tensor,
             reference_fn=reference_convert_color_space_image_tensor,
             reference_inputs_fn=reference_inputs_convert_color_space_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+            },
         ),
         KernelInfo(
             F.convert_color_space_video,
@@ -775,10 +807,7 @@ def sample_inputs_rotate_image_tensor():
         yield ArgsKwargs(image_loader, angle=15.0, center=center)
 
     for image_loader in make_rotate_image_loaders():
-        fills = [None, 0.5]
-        if image_loader.num_channels > 1:
-            fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]])
-        for fill in fills:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, angle=15.0, fill=fill)
 
     for image_loader, interpolation in itertools.product(
@@ -1062,10 +1091,7 @@ def sample_inputs_pad_image_tensor():
         yield ArgsKwargs(image_loader, padding=padding)
 
     for image_loader in make_pad_image_loaders():
-        fills = [None, 0.5]
-        if image_loader.num_channels > 1:
-            fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]])
-        for fill in fills:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, padding=[1], fill=fill)
 
     for image_loader, padding_mode in itertools.product(
@@ -1084,10 +1110,11 @@ def sample_inputs_pad_image_tensor():
 def reference_inputs_pad_image_tensor():
     for image_loader, params in itertools.product(make_image_loaders(extra_dims=[()]), _PAD_PARAMS):
         # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
-        fills = [None, 128.0, 128]
-        if params["padding_mode"] == "constant":
-            fills.append([12.0 + c for c in range(image_loader.num_channels)])
-        for fill in fills:
+        for fill in _get_fills(
+            num_channels=image_loader.num_channels,
+            dtype=image_loader.dtype,
+            vector=params["padding_mode"] == "constant",
+        ):
             yield ArgsKwargs(image_loader, fill=fill, **params)
 
 
@@ -1110,8 +1137,10 @@ def sample_inputs_pad_mask():
 
 
 def reference_inputs_pad_mask():
-    for image_loader, fill, params in itertools.product(make_image_loaders(extra_dims=[()]), [None, 127], _PAD_PARAMS):
-        yield ArgsKwargs(image_loader, fill=fill, **params)
+    for mask_loader, fill, params in itertools.product(
+        make_mask_loaders(num_objects=[1], extra_dims=[()]), [None, 127], _PAD_PARAMS
+    ):
+        yield ArgsKwargs(mask_loader, fill=fill, **params)
 
 
 def sample_inputs_pad_video():
@@ -1197,14 +1226,14 @@ def reference_inputs_pad_bounding_box():
 
 def sample_inputs_perspective_image_tensor():
     for image_loader in make_image_loaders(sizes=["random"]):
-        for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
 
 
 def reference_inputs_perspective_image_tensor():
     for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
         # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
-        for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
 
 
@@ -1271,7 +1300,7 @@ def _get_elastic_displacement(spatial_size):
 def sample_inputs_elastic_image_tensor():
     for image_loader in make_image_loaders(sizes=["random"]):
         displacement = _get_elastic_displacement(image_loader.spatial_size)
-        for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, displacement=displacement, fill=fill)
 
 
@@ -1285,7 +1314,7 @@ def reference_inputs_elastic_image_tensor():
         ],
     ):
         displacement = _get_elastic_displacement(image_loader.spatial_size)
-        for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
+        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill)
 
 
@@ -2070,6 +2099,17 @@ def sample_inputs_ten_crop_video():
         yield ArgsKwargs(video_loader, size=size)
 
 
+def multi_crop_pil_reference_wrapper(pil_kernel):
+    def wrapper(input_tensor, *other_args, **kwargs):
+        output = pil_reference_wrapper(pil_kernel)(input_tensor, *other_args, **kwargs)
+        return type(output)(
+            F.convert_dtype_image_tensor(F.to_image_tensor(output_pil), dtype=input_tensor.dtype)
+            for output_pil in output
+        )
+
+    return wrapper
+
+
 _common_five_ten_crop_marks = [
     xfail_jit_python_scalar_arg("size"),
     mark_framework_limitation(("TestKernels", "test_batched_vs_single"), "Custom batching needed."),
@@ -2080,7 +2120,7 @@ def sample_inputs_ten_crop_video():
         KernelInfo(
             F.five_crop_image_tensor,
             sample_inputs_fn=sample_inputs_five_crop_image_tensor,
-            reference_fn=pil_reference_wrapper(F.five_crop_image_pil),
+            reference_fn=multi_crop_pil_reference_wrapper(F.five_crop_image_pil),
             reference_inputs_fn=reference_inputs_five_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
             closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
@@ -2093,7 +2133,7 @@ def sample_inputs_ten_crop_video():
         KernelInfo(
             F.ten_crop_image_tensor,
             sample_inputs_fn=sample_inputs_ten_crop_image_tensor,
-            reference_fn=pil_reference_wrapper(F.ten_crop_image_pil),
+            reference_fn=multi_crop_pil_reference_wrapper(F.ten_crop_image_pil),
             reference_inputs_fn=reference_inputs_ten_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
             closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py
index d3100bc81d5..41a3bfbde65 100644
--- a/test/test_prototype_transforms_consistency.py
+++ b/test/test_prototype_transforms_consistency.py
@@ -1005,7 +1005,7 @@ def make_datapoints(self, supports_pil=True, image_dtype=torch.uint8):
 
             dp = (conv_fn(feature_image), feature_mask)
             dp_ref = (
-                to_image_pil(feature_image) if supports_pil else torch.Tensor(feature_image),
+                to_image_pil(feature_image) if supports_pil else feature_image.as_subclass(torch.Tensor),
                 to_image_pil(feature_mask),
             )
 
@@ -1019,12 +1019,16 @@ def check(self, t, t_ref, data_kwargs=None):
         for dp, dp_ref in self.make_datapoints(**data_kwargs or dict()):
 
             self.set_seed()
-            output = t(dp)
+            actual = actual_image, actual_mask = t(dp)
 
             self.set_seed()
-            expected_output = t_ref(*dp_ref)
+            expected_image, expected_mask = t_ref(*dp_ref)
+            if isinstance(actual_image, torch.Tensor) and not isinstance(expected_image, torch.Tensor):
+                expected_image = legacy_F.pil_to_tensor(expected_image)
+            expected_mask = legacy_F.pil_to_tensor(expected_mask).squeeze(0)
+            expected = (expected_image, expected_mask)
 
-            assert_equal(output, expected_output)
+            assert_equal(actual, expected)
 
     @pytest.mark.parametrize(
         ("t_ref", "t", "data_kwargs"),
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index f8fc8e5fd2f..31dc67effcf 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -237,7 +237,6 @@ def test_against_reference(self, test_id, info, args_kwargs):
         assert_close(
             actual,
             expected,
-            check_dtype=False,
             **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device),
         )
 

From 8ab25ff6ad37518671a616dd4c16069b661ef49b Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 10 Nov 2022 10:59:31 +0100
Subject: [PATCH 2/6] use individual tolerances

---
 test/prototype_transforms_kernel_infos.py    | 194 ++++++++++++++-----
 test/test_prototype_transforms_functional.py |  31 ++-
 2 files changed, 179 insertions(+), 46 deletions(-)

diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
index 308e7cfb0de..56701b9d97f 100644
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -62,11 +62,6 @@ def __init__(
         self.reference_inputs_fn = reference_inputs_fn
 
 
-DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS = {
-    (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=0.9, rtol=0, agg_method="mean"),
-    (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=255 * 0.9, rtol=0, agg_method="mean"),
-}
-
 CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE = {
     (("TestKernels", "test_cuda_vs_cpu"), dtype, "cuda"): dict(atol=atol, rtol=0)
     for dtype, atol in [(torch.uint8, 1), (torch.float32, 1 / 255)]
@@ -193,7 +188,9 @@ def reference_inputs_flip_bounding_box():
             sample_inputs_fn=sample_inputs_horizontal_flip_image_tensor,
             reference_fn=pil_reference_wrapper(F.horizontal_flip_image_pil),
             reference_inputs_fn=reference_inputs_horizontal_flip_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.horizontal_flip_bounding_box,
@@ -338,7 +335,14 @@ def reference_inputs_resize_bounding_box():
             reference_fn=reference_resize_image_tensor,
             reference_inputs_fn=reference_inputs_resize_image_tensor,
             closeness_kwargs={
-                **DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=110 / 255, rtol=0, agg_method="mean"
+                ),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=110, rtol=0, agg_method="mean"
+                ),
                 **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
             },
             test_marks=[
@@ -359,7 +363,9 @@ def reference_inputs_resize_bounding_box():
             sample_inputs_fn=sample_inputs_resize_mask,
             reference_fn=reference_resize_mask,
             reference_inputs_fn=reference_inputs_resize_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
+            },
             test_marks=[
                 xfail_jit_python_scalar_arg("size"),
             ],
@@ -591,7 +597,14 @@ def sample_inputs_affine_video():
             sample_inputs_fn=sample_inputs_affine_image_tensor,
             reference_fn=pil_reference_wrapper(F.affine_image_pil),
             reference_inputs_fn=reference_inputs_affine_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=10 / 255, rtol=0, agg_method="mean"
+                ),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=10, rtol=0, agg_method="mean"
+                ),
+            },
             test_marks=[
                 xfail_jit_python_scalar_arg("shear"),
                 xfail_jit_tuple_instead_of_list("fill"),
@@ -616,7 +629,11 @@ def sample_inputs_affine_video():
             sample_inputs_fn=sample_inputs_affine_mask,
             reference_fn=reference_affine_mask,
             reference_inputs_fn=reference_inputs_resize_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=6, rtol=0, agg_method="mean"
+                ),
+            },
             test_marks=[
                 xfail_jit_python_scalar_arg("shear"),
             ],
@@ -771,7 +788,9 @@ def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size):
             sample_inputs_fn=sample_inputs_vertical_flip_image_tensor,
             reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil),
             reference_inputs_fn=reference_inputs_vertical_flip_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.vertical_flip_bounding_box,
@@ -859,7 +878,16 @@ def sample_inputs_rotate_video():
             sample_inputs_fn=sample_inputs_rotate_image_tensor,
             reference_fn=pil_reference_wrapper(F.rotate_image_pil),
             reference_inputs_fn=reference_inputs_rotate_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=120 / 255, rtol=0, agg_method="mean"
+                ),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=90, rtol=0, agg_method="mean"
+                ),
+            },
             test_marks=[
                 xfail_jit_tuple_instead_of_list("fill"),
                 # TODO: check if this is a regression since it seems that should be supported if `int` is ok
@@ -875,7 +903,9 @@ def sample_inputs_rotate_video():
             sample_inputs_fn=sample_inputs_rotate_mask,
             reference_fn=reference_rotate_mask,
             reference_inputs_fn=reference_inputs_rotate_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=8, rtol=0),
+            },
         ),
         KernelInfo(
             F.rotate_video,
@@ -957,7 +987,9 @@ def reference_inputs_crop_bounding_box():
             sample_inputs_fn=sample_inputs_crop_image_tensor,
             reference_fn=pil_reference_wrapper(F.crop_image_pil),
             reference_inputs_fn=reference_inputs_crop_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.crop_bounding_box,
@@ -970,7 +1002,6 @@ def reference_inputs_crop_bounding_box():
             sample_inputs_fn=sample_inputs_crop_mask,
             reference_fn=pil_reference_wrapper(F.crop_image_pil),
             reference_inputs_fn=reference_inputs_crop_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
         ),
         KernelInfo(
             F.crop_video,
@@ -1050,7 +1081,14 @@ def sample_inputs_resized_crop_video():
             reference_fn=reference_resized_crop_image_tensor,
             reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
             closeness_kwargs={
-                **DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=60 / 255, rtol=0, agg_method="mean"
+                ),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=60, rtol=0, agg_method="mean"
+                ),
                 **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
             },
         ),
@@ -1063,7 +1101,9 @@ def sample_inputs_resized_crop_video():
             sample_inputs_fn=sample_inputs_resized_crop_mask,
             reference_fn=pil_reference_wrapper(F.resized_crop_image_pil),
             reference_inputs_fn=reference_inputs_resized_crop_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
+            },
         ),
         KernelInfo(
             F.resized_crop_video,
@@ -1187,7 +1227,12 @@ def reference_inputs_pad_bounding_box():
             sample_inputs_fn=sample_inputs_pad_image_tensor,
             reference_fn=pil_reference_wrapper(F.pad_image_pil),
             reference_inputs_fn=reference_inputs_pad_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # FIXME: This tolerance effectively renders the test useless since it cannot fail. We need to
+                #  investigate for what configuration this is happening. Since uint8 works perfectly there seems to be
+                #  a bug in the kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1, rtol=0),
+            },
             test_marks=[
                 xfail_jit_tuple_instead_of_list("padding"),
                 xfail_jit_tuple_instead_of_list("fill"),
@@ -1209,7 +1254,6 @@ def reference_inputs_pad_bounding_box():
             sample_inputs_fn=sample_inputs_pad_mask,
             reference_fn=pil_reference_wrapper(F.pad_image_pil),
             reference_inputs_fn=reference_inputs_pad_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
         ),
         KernelInfo(
             F.pad_video,
@@ -1269,7 +1313,14 @@ def sample_inputs_perspective_video():
             reference_fn=pil_reference_wrapper(F.perspective_image_pil),
             reference_inputs_fn=reference_inputs_perspective_image_tensor,
             closeness_kwargs={
-                **DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=160 / 255, rtol=0, agg_method="mean"
+                ),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=160, rtol=0, agg_method="mean"
+                ),
                 **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
             },
         ),
@@ -1282,7 +1333,9 @@ def sample_inputs_perspective_video():
             sample_inputs_fn=sample_inputs_perspective_mask,
             reference_fn=pil_reference_wrapper(F.perspective_image_pil),
             reference_inputs_fn=reference_inputs_perspective_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
+            },
         ),
         KernelInfo(
             F.perspective_video,
@@ -1353,7 +1406,13 @@ def sample_inputs_elastic_video():
             sample_inputs_fn=sample_inputs_elastic_image_tensor,
             reference_fn=pil_reference_wrapper(F.elastic_image_pil),
             reference_inputs_fn=reference_inputs_elastic_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel for floating point inputs, given that uint8 works perfectly.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=160 / 255, rtol=0, agg_method="mean"
+                ),
+            },
         ),
         KernelInfo(
             F.elastic_bounding_box,
@@ -1364,7 +1423,11 @@ def sample_inputs_elastic_video():
             sample_inputs_fn=sample_inputs_elastic_mask,
             reference_fn=pil_reference_wrapper(F.elastic_image_pil),
             reference_inputs_fn=reference_inputs_elastic_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
+                    atol=65, rtol=0, agg_method="mean"
+                ),
+            },
         ),
         KernelInfo(
             F.elastic_video,
@@ -1434,7 +1497,9 @@ def sample_inputs_center_crop_video():
             sample_inputs_fn=sample_inputs_center_crop_image_tensor,
             reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
             reference_inputs_fn=reference_inputs_center_crop_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
             test_marks=[
                 xfail_jit_python_scalar_arg("output_size"),
             ],
@@ -1451,7 +1516,6 @@ def sample_inputs_center_crop_video():
             sample_inputs_fn=sample_inputs_center_crop_mask,
             reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
             reference_inputs_fn=reference_inputs_center_crop_mask,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
             test_marks=[
                 xfail_jit_python_scalar_arg("output_size"),
             ],
@@ -1488,10 +1552,7 @@ def sample_inputs_gaussian_blur_video():
         KernelInfo(
             F.gaussian_blur_image_tensor,
             sample_inputs_fn=sample_inputs_gaussian_blur_image_tensor,
-            closeness_kwargs={
-                **DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
-                **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
-            },
+            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
             test_marks=[
                 xfail_jit_python_scalar_arg("kernel_size"),
                 xfail_jit_python_scalar_arg("sigma"),
@@ -1580,7 +1641,11 @@ def sample_inputs_equalize_video():
             sample_inputs_fn=sample_inputs_equalize_image_tensor,
             reference_fn=pil_reference_wrapper(F.equalize_image_pil),
             reference_inputs_fn=reference_inputs_equalize_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel for floating point inputs, given that uint8 works perfectly.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=12 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.equalize_video,
@@ -1617,7 +1682,9 @@ def sample_inputs_invert_video():
             sample_inputs_fn=sample_inputs_invert_image_tensor,
             reference_fn=pil_reference_wrapper(F.invert_image_pil),
             reference_inputs_fn=reference_inputs_invert_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.invert_video,
@@ -1639,7 +1706,9 @@ def sample_inputs_posterize_image_tensor():
 
 def reference_inputs_posterize_image_tensor():
     for image_loader, bits in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _POSTERIZE_BITS,
     ):
         yield ArgsKwargs(image_loader, bits=bits)
@@ -1658,7 +1727,6 @@ def sample_inputs_posterize_video():
             sample_inputs_fn=sample_inputs_posterize_image_tensor,
             reference_fn=pil_reference_wrapper(F.posterize_image_pil),
             reference_inputs_fn=reference_inputs_posterize_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
         ),
         KernelInfo(
             F.posterize_video,
@@ -1681,6 +1749,14 @@ def sample_inputs_solarize_image_tensor():
         yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype)))
 
 
+def reference_solarize_image_tensor(image, threshold):
+    # The `pil_reference_wrapper` converts floating point tensor images into uint8 PIL images. In that case we also
+    # need to scale the threshold accordingly.
+    if image.dtype.is_floating_point:
+        threshold *= 255
+    return pil_reference_wrapper(F.solarize_image_pil)(image, threshold)
+
+
 def reference_inputs_solarize_image_tensor():
     for image_loader in make_image_loaders(
         color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]
@@ -1700,9 +1776,13 @@ def sample_inputs_solarize_video():
             F.solarize_image_tensor,
             kernel_name="solarize_image_tensor",
             sample_inputs_fn=sample_inputs_solarize_image_tensor,
-            reference_fn=pil_reference_wrapper(F.solarize_image_pil),
+            reference_fn=reference_solarize_image_tensor,
             reference_inputs_fn=reference_inputs_solarize_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
+                    atol=1 / 255, rtol=0, agg_method="mean"
+                ),
+            },
         ),
         KernelInfo(
             F.solarize_video,
@@ -1739,7 +1819,9 @@ def sample_inputs_autocontrast_video():
             sample_inputs_fn=sample_inputs_autocontrast_image_tensor,
             reference_fn=pil_reference_wrapper(F.autocontrast_image_pil),
             reference_inputs_fn=reference_inputs_autocontrast_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.autocontrast_video,
@@ -1780,7 +1862,9 @@ def sample_inputs_adjust_sharpness_video():
             sample_inputs_fn=sample_inputs_adjust_sharpness_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_sharpness_image_pil),
             reference_inputs_fn=reference_inputs_adjust_sharpness_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_sharpness_video,
@@ -1851,7 +1935,9 @@ def sample_inputs_adjust_brightness_video():
             sample_inputs_fn=sample_inputs_adjust_brightness_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_brightness_image_pil),
             reference_inputs_fn=reference_inputs_adjust_brightness_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_brightness_video,
@@ -1892,7 +1978,10 @@ def sample_inputs_adjust_contrast_video():
             sample_inputs_fn=sample_inputs_adjust_contrast_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_contrast_image_pil),
             reference_inputs_fn=reference_inputs_adjust_contrast_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_contrast_video,
@@ -1937,7 +2026,12 @@ def sample_inputs_adjust_gamma_video():
             sample_inputs_fn=sample_inputs_adjust_gamma_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_gamma_image_pil),
             reference_inputs_fn=reference_inputs_adjust_gamma_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel for floating point inputs, given that uint8 works almost perfectly.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=40 / 255, rtol=0),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_gamma_video,
@@ -1978,7 +2072,12 @@ def sample_inputs_adjust_hue_video():
             sample_inputs_fn=sample_inputs_adjust_hue_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_hue_image_pil),
             reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
+                #  kernel.
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=20 / 255, rtol=0),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=20, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_hue_video,
@@ -2018,7 +2117,10 @@ def sample_inputs_adjust_saturation_video():
             sample_inputs_fn=sample_inputs_adjust_saturation_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_saturation_image_pil),
             reference_inputs_fn=reference_inputs_adjust_saturation_image_tensor,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
+                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+            },
         ),
         KernelInfo(
             F.adjust_saturation_video,
@@ -2123,7 +2225,9 @@ def wrapper(input_tensor, *other_args, **kwargs):
             reference_fn=multi_crop_pil_reference_wrapper(F.five_crop_image_pil),
             reference_inputs_fn=reference_inputs_five_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.five_crop_video,
@@ -2136,7 +2240,9 @@ def wrapper(input_tensor, *other_args, **kwargs):
             reference_fn=multi_crop_pil_reference_wrapper(F.ten_crop_image_pil),
             reference_inputs_fn=reference_inputs_ten_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
-            closeness_kwargs=DEFAULT_PIL_REFERENCE_CLOSENESS_KWARGS,
+            closeness_kwargs={
+                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
+            },
         ),
         KernelInfo(
             F.ten_crop_video,
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 31dc67effcf..13ed4f527e4 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -22,6 +22,10 @@
 from torchvision.transforms.functional import _get_perspective_coeffs
 
 
+KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS}
+DISPATCHER_INFOS_MAP = {info.dispatcher: info for info in DISPATCHER_INFOS}
+
+
 @cache
 def script(fn):
     try:
@@ -420,12 +424,12 @@ def test_alias(alias, target):
 @pytest.mark.parametrize(
     ("info", "args_kwargs"),
     make_info_args_kwargs_params(
-        next(info for info in KERNEL_INFOS if info.kernel is F.convert_image_dtype),
+        KERNEL_INFOS_MAP[F.convert_dtype_image_tensor],
         args_kwargs_fn=lambda info: info.sample_inputs_fn(),
     ),
 )
 @pytest.mark.parametrize("device", cpu_and_gpu())
-def test_dtype_and_device_convert_image_dtype(info, args_kwargs, device):
+def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     (input, *other_args), kwargs = args_kwargs.load(device)
     dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32)
 
@@ -435,6 +439,29 @@ def test_dtype_and_device_convert_image_dtype(info, args_kwargs, device):
     assert output.device == input.device
 
 
+@pytest.mark.parametrize(
+    ("info", "args_kwargs"),
+    make_info_args_kwargs_params(
+        KERNEL_INFOS_MAP[F.posterize_image_tensor],
+        args_kwargs_fn=lambda info: info.reference_inputs_fn(),
+    ),
+)
+def test_posterize_image_tensor_float_vs_int(info, args_kwargs):
+    (input, *other_args), kwargs = args_kwargs.load("cpu")
+
+    actual = F.convert_dtype_image_tensor(
+        info.kernel(
+            F.convert_dtype_image_tensor(input, dtype=torch.float32),
+            *other_args,
+            **kwargs,
+        ),
+        dtype=input.dtype,
+    )
+    expected = info.kernel(input, *other_args, **kwargs)
+
+    assert_close(actual, expected, atol=1, rtol=0)
+
+
 # TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in
 #  `prototype_transforms_kernel_infos.py`
 

From 366551c15c6d259481f11b5309688598942bd0af Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 10 Nov 2022 15:48:16 +0100
Subject: [PATCH 3/6] refactor PIL reference test

---
 test/prototype_common_utils.py               |  30 ++
 test/prototype_transforms_kernel_infos.py    | 379 ++++++++++---------
 test/test_prototype_transforms_functional.py |  56 +--
 3 files changed, 258 insertions(+), 207 deletions(-)

diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py
index 86ed5c22997..69a33aa3178 100644
--- a/test/prototype_common_utils.py
+++ b/test/prototype_common_utils.py
@@ -136,6 +136,29 @@ def assert_close(
 assert_equal = functools.partial(assert_close, rtol=0, atol=0)
 
 
+def parametrized_error_message(*args, **kwargs):
+    def to_str(obj):
+        if isinstance(obj, torch.Tensor) and obj.numel() > 10:
+            return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
+        else:
+            return repr(obj)
+
+    postfix = "\n".join(
+        [
+            "",
+            "Failure happened for the following parameters:",
+            "",
+            *[to_str(arg) for arg in args],
+            *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
+        ]
+    )
+
+    def wrapper(msg):
+        return f"{msg}\n{postfix}"
+
+    return wrapper
+
+
 class ArgsKwargs:
     def __init__(self, *args, **kwargs):
         self.args = args
@@ -625,6 +648,13 @@ def get_marks(self, test_id, args_kwargs):
         ]
 
     def get_closeness_kwargs(self, test_id, *, dtype, device):
+        if not (isinstance(test_id, tuple) and len(test_id) == 2):
+            msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name"
+            if callable(test_id):
+                msg += ". Did you forget to add the `test_id` fixture to parameters of the test?"
+            else:
+                msg += f", but got {test_id} instead."
+            raise pytest.UsageError(msg)
         if isinstance(device, torch.device):
             device = device.type
         return self.closeness_kwargs.get((test_id, dtype, device), dict())
diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
index 56701b9d97f..dafbf9abc97 100644
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -50,6 +50,12 @@ def __init__(
         # These inputs are only used for the reference tests and thus can be comprehensive with regard to the parameter
         # values to be tested. If not specified, `sample_inputs_fn` will be used.
         reference_inputs_fn=None,
+        # If true-ish, triggers a test that checks the kernel for consistency between uint8 and float32 inputs with the
+        # the reference inputs. This is usually used whenever we use a PIL kernel as reference.
+        # Can be a callable in which case it will be called with `other_args, kwargs`. It should return the same
+        # structure, but with adapted parameters. This is useful in case a parameter value is closely tied to the input
+        # dtype.
+        float32_vs_uint8=False,
         # See InfoBase
         test_marks=None,
         # See InfoBase
@@ -61,16 +67,45 @@ def __init__(
         self.reference_fn = reference_fn
         self.reference_inputs_fn = reference_inputs_fn
 
+        if float32_vs_uint8 and not callable(float32_vs_uint8):
+            float32_vs_uint8 = lambda other_args, kwargs: (other_args, kwargs)  # noqa: E731
+        self.float32_vs_uint8 = float32_vs_uint8
 
-CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE = {
-    (("TestKernels", "test_cuda_vs_cpu"), dtype, "cuda"): dict(atol=atol, rtol=0)
-    for dtype, atol in [(torch.uint8, 1), (torch.float32, 1 / 255)]
-}
+
+def _pixel_difference_closeness_kwargs(uint8_atol, *, dtype=torch.uint8, agg_method=None):
+    return dict(atol=uint8_atol / 255 * get_max_value(dtype), rtol=0, agg_method=agg_method)
+
+
+def cuda_vs_cpu_pixel_difference(atol=1):
+    return {
+        (("TestKernels", "test_cuda_vs_cpu"), dtype, "cuda"): _pixel_difference_closeness_kwargs(atol, dtype=dtype)
+        for dtype in [torch.uint8, torch.float32]
+    }
+
+
+def pil_reference_pixel_difference(atol=1, agg_method=None):
+    return {
+        (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): _pixel_difference_closeness_kwargs(
+            atol, agg_method=agg_method
+        )
+    }
+
+
+def float32_vs_uint8_pixel_difference(atol=1, agg_method=None):
+    return {
+        (
+            ("TestKernels", "test_float32_vs_uint8"),
+            torch.float32,
+            "cpu",
+        ): _pixel_difference_closeness_kwargs(atol, dtype=torch.float32, agg_method=agg_method)
+    }
 
 
 def pil_reference_wrapper(pil_kernel):
     @functools.wraps(pil_kernel)
     def wrapper(input_tensor, *other_args, **kwargs):
+        if input_tensor.dtype != torch.uint8:
+            raise pytest.UsageError(f"Can only test uint8 tensor images against PIL, but input is {input_tensor.dtype}")
         if input_tensor.ndim > 3:
             raise pytest.UsageError(
                 f"Can only test single tensor images against PIL, but input has shape {input_tensor.shape}"
@@ -81,7 +116,7 @@ def wrapper(input_tensor, *other_args, **kwargs):
         if not isinstance(output_pil, PIL.Image.Image):
             return output_pil
 
-        output_tensor = F.convert_dtype_image_tensor(F.to_image_tensor(output_pil), dtype=input_tensor.dtype)
+        output_tensor = F.to_image_tensor(output_pil)
 
         # 2D mask shenanigans
         if output_tensor.ndim == 2 and input_tensor.ndim == 3:
@@ -134,7 +169,7 @@ def sample_inputs_horizontal_flip_image_tensor():
 
 
 def reference_inputs_horizontal_flip_image_tensor():
-    for image_loader in make_image_loaders(extra_dims=[()]):
+    for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]):
         yield ArgsKwargs(image_loader)
 
 
@@ -188,9 +223,7 @@ def reference_inputs_flip_bounding_box():
             sample_inputs_fn=sample_inputs_horizontal_flip_image_tensor,
             reference_fn=pil_reference_wrapper(F.horizontal_flip_image_pil),
             reference_inputs_fn=reference_inputs_horizontal_flip_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
         ),
         KernelInfo(
             F.horizontal_flip_bounding_box,
@@ -254,7 +287,7 @@ def reference_resize_image_tensor(*args, **kwargs):
 
 def reference_inputs_resize_image_tensor():
     for image_loader, interpolation in itertools.product(
-        make_image_loaders(extra_dims=[()]),
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
         [
             F.InterpolationMode.NEAREST,
             F.InterpolationMode.NEAREST_EXACT,
@@ -334,16 +367,13 @@ def reference_inputs_resize_bounding_box():
             sample_inputs_fn=sample_inputs_resize_image_tensor,
             reference_fn=reference_resize_image_tensor,
             reference_inputs_fn=reference_inputs_resize_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=110 / 255, rtol=0, agg_method="mean"
-                ),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=110, rtol=0, agg_method="mean"
-                ),
-                **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+                # TODO: investigate
+                **pil_reference_pixel_difference(110, agg_method="mean"),
+                **cuda_vs_cpu_pixel_difference(),
+                # TODO: investigate
+                **float32_vs_uint8_pixel_difference(50),
             },
             test_marks=[
                 xfail_jit_python_scalar_arg("size"),
@@ -363,9 +393,8 @@ def reference_inputs_resize_bounding_box():
             sample_inputs_fn=sample_inputs_resize_mask,
             reference_fn=reference_resize_mask,
             reference_inputs_fn=reference_inputs_resize_mask,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=pil_reference_pixel_difference(10),
             test_marks=[
                 xfail_jit_python_scalar_arg("size"),
             ],
@@ -373,7 +402,7 @@ def reference_inputs_resize_bounding_box():
         KernelInfo(
             F.resize_video,
             sample_inputs_fn=sample_inputs_resize_video,
-            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
         ),
     ]
 )
@@ -419,7 +448,7 @@ def _full_affine_params(**partial_params):
 ]
 
 
-def _get_fills(*, num_channels, dtype, vector=True):
+def get_fills(*, num_channels, dtype, vector=True):
     yield None
 
     max_value = get_max_value(dtype)
@@ -436,6 +465,19 @@ def _get_fills(*, num_channels, dtype, vector=True):
         yield [12.0 + c for c in range(num_channels)]
 
 
+def float32_vs_uint8_fill_adapter(other_args, kwargs):
+    fill = kwargs.get("fill")
+    if fill is None:
+        return other_args, kwargs
+
+    if isinstance(fill, (int, float)):
+        fill /= 255
+    else:
+        fill = type(fill)(fill_ / 255 for fill_ in fill)
+
+    return other_args, dict(kwargs, fill=fill)
+
+
 def sample_inputs_affine_image_tensor():
     make_affine_image_loaders = functools.partial(
         make_image_loaders, sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32]
@@ -445,7 +487,7 @@ def sample_inputs_affine_image_tensor():
         yield ArgsKwargs(image_loader, **affine_params)
 
     for image_loader in make_affine_image_loaders():
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, **_full_affine_params(), fill=fill)
 
     for image_loader, interpolation in itertools.product(
@@ -459,7 +501,9 @@ def sample_inputs_affine_image_tensor():
 
 
 def reference_inputs_affine_image_tensor():
-    for image_loader, affine_kwargs in itertools.product(make_image_loaders(extra_dims=[()]), _AFFINE_KWARGS):
+    for image_loader, affine_kwargs in itertools.product(
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS
+    ):
         yield ArgsKwargs(
             image_loader,
             interpolation=F.InterpolationMode.NEAREST,
@@ -597,14 +641,8 @@ def sample_inputs_affine_video():
             sample_inputs_fn=sample_inputs_affine_image_tensor,
             reference_fn=pil_reference_wrapper(F.affine_image_pil),
             reference_inputs_fn=reference_inputs_affine_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=10 / 255, rtol=0, agg_method="mean"
-                ),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=10, rtol=0, agg_method="mean"
-                ),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=pil_reference_pixel_difference(10, agg_method="mean"),
             test_marks=[
                 xfail_jit_python_scalar_arg("shear"),
                 xfail_jit_tuple_instead_of_list("fill"),
@@ -629,11 +667,8 @@ def sample_inputs_affine_video():
             sample_inputs_fn=sample_inputs_affine_mask,
             reference_fn=reference_affine_mask,
             reference_inputs_fn=reference_inputs_resize_mask,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=6, rtol=0, agg_method="mean"
-                ),
-            },
+            closeness_kwargs=pil_reference_pixel_difference(10),
+            float32_vs_uint8=True,
             test_marks=[
                 xfail_jit_python_scalar_arg("shear"),
             ],
@@ -705,7 +740,7 @@ def reference_convert_color_space_image_tensor(image_pil, old_color_space, new_c
 def reference_inputs_convert_color_space_image_tensor():
     for args_kwargs in sample_inputs_convert_color_space_image_tensor():
         (image_loader, *other_args), kwargs = args_kwargs
-        if len(image_loader.shape) == 3:
+        if len(image_loader.shape) == 3 and image_loader.dtype == torch.uint8:
             yield args_kwargs
 
 
@@ -725,8 +760,8 @@ def sample_inputs_convert_color_space_video():
             reference_fn=reference_convert_color_space_image_tensor,
             reference_inputs_fn=reference_inputs_convert_color_space_image_tensor,
             closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+                **pil_reference_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(),
             },
         ),
         KernelInfo(
@@ -743,7 +778,7 @@ def sample_inputs_vertical_flip_image_tensor():
 
 
 def reference_inputs_vertical_flip_image_tensor():
-    for image_loader in make_image_loaders(extra_dims=[()]):
+    for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]):
         yield ArgsKwargs(image_loader)
 
 
@@ -788,9 +823,7 @@ def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size):
             sample_inputs_fn=sample_inputs_vertical_flip_image_tensor,
             reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil),
             reference_inputs_fn=reference_inputs_vertical_flip_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
         ),
         KernelInfo(
             F.vertical_flip_bounding_box,
@@ -826,7 +859,7 @@ def sample_inputs_rotate_image_tensor():
         yield ArgsKwargs(image_loader, angle=15.0, center=center)
 
     for image_loader in make_rotate_image_loaders():
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, angle=15.0, fill=fill)
 
     for image_loader, interpolation in itertools.product(
@@ -837,7 +870,9 @@ def sample_inputs_rotate_image_tensor():
 
 
 def reference_inputs_rotate_image_tensor():
-    for image_loader, angle in itertools.product(make_image_loaders(extra_dims=[()]), _ROTATE_ANGLES):
+    for image_loader, angle in itertools.product(
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES
+    ):
         yield ArgsKwargs(image_loader, angle=angle)
 
 
@@ -878,16 +913,9 @@ def sample_inputs_rotate_video():
             sample_inputs_fn=sample_inputs_rotate_image_tensor,
             reference_fn=pil_reference_wrapper(F.rotate_image_pil),
             reference_inputs_fn=reference_inputs_rotate_image_tensor,
-            closeness_kwargs={
-                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=120 / 255, rtol=0, agg_method="mean"
-                ),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=90, rtol=0, agg_method="mean"
-                ),
-            },
+            float32_vs_uint8=True,
+            # TODO: investigate
+            closeness_kwargs=pil_reference_pixel_difference(100, agg_method="mean"),
             test_marks=[
                 xfail_jit_tuple_instead_of_list("fill"),
                 # TODO: check if this is a regression since it seems that should be supported if `int` is ok
@@ -903,9 +931,8 @@ def sample_inputs_rotate_video():
             sample_inputs_fn=sample_inputs_rotate_mask,
             reference_fn=reference_rotate_mask,
             reference_inputs_fn=reference_inputs_rotate_mask,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=8, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=pil_reference_pixel_difference(10),
         ),
         KernelInfo(
             F.rotate_video,
@@ -932,7 +959,9 @@ def sample_inputs_crop_image_tensor():
 
 
 def reference_inputs_crop_image_tensor():
-    for image_loader, params in itertools.product(make_image_loaders(extra_dims=[()]), _CROP_PARAMS):
+    for image_loader, params in itertools.product(
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _CROP_PARAMS
+    ):
         yield ArgsKwargs(image_loader, **params)
 
 
@@ -987,9 +1016,7 @@ def reference_inputs_crop_bounding_box():
             sample_inputs_fn=sample_inputs_crop_image_tensor,
             reference_fn=pil_reference_wrapper(F.crop_image_pil),
             reference_inputs_fn=reference_inputs_crop_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
         ),
         KernelInfo(
             F.crop_bounding_box,
@@ -1002,6 +1029,7 @@ def reference_inputs_crop_bounding_box():
             sample_inputs_fn=sample_inputs_crop_mask,
             reference_fn=pil_reference_wrapper(F.crop_image_pil),
             reference_inputs_fn=reference_inputs_crop_mask,
+            float32_vs_uint8=True,
         ),
         KernelInfo(
             F.crop_video,
@@ -1030,7 +1058,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs):
 
 def reference_inputs_resized_crop_image_tensor():
     for image_loader, interpolation, params in itertools.product(
-        make_image_loaders(extra_dims=[()]),
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
         [
             F.InterpolationMode.NEAREST,
             F.InterpolationMode.NEAREST_EXACT,
@@ -1080,16 +1108,13 @@ def sample_inputs_resized_crop_video():
             sample_inputs_fn=sample_inputs_resized_crop_image_tensor,
             reference_fn=reference_resized_crop_image_tensor,
             reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=60 / 255, rtol=0, agg_method="mean"
-                ),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=60, rtol=0, agg_method="mean"
-                ),
-                **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+                # TODO: investigate
+                **pil_reference_pixel_difference(60, agg_method="mean"),
+                **cuda_vs_cpu_pixel_difference(),
+                # TODO: investigate
+                **float32_vs_uint8_pixel_difference(50),
             },
         ),
         KernelInfo(
@@ -1101,14 +1126,13 @@ def sample_inputs_resized_crop_video():
             sample_inputs_fn=sample_inputs_resized_crop_mask,
             reference_fn=pil_reference_wrapper(F.resized_crop_image_pil),
             reference_inputs_fn=reference_inputs_resized_crop_mask,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=pil_reference_pixel_difference(10),
         ),
         KernelInfo(
             F.resized_crop_video,
             sample_inputs_fn=sample_inputs_resized_crop_video,
-            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
         ),
     ]
 )
@@ -1131,7 +1155,7 @@ def sample_inputs_pad_image_tensor():
         yield ArgsKwargs(image_loader, padding=padding)
 
     for image_loader in make_pad_image_loaders():
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, padding=[1], fill=fill)
 
     for image_loader, padding_mode in itertools.product(
@@ -1148,9 +1172,11 @@ def sample_inputs_pad_image_tensor():
 
 
 def reference_inputs_pad_image_tensor():
-    for image_loader, params in itertools.product(make_image_loaders(extra_dims=[()]), _PAD_PARAMS):
+    for image_loader, params in itertools.product(
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PAD_PARAMS
+    ):
         # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
-        for fill in _get_fills(
+        for fill in get_fills(
             num_channels=image_loader.num_channels,
             dtype=image_loader.dtype,
             vector=params["padding_mode"] == "constant",
@@ -1227,12 +1253,8 @@ def reference_inputs_pad_bounding_box():
             sample_inputs_fn=sample_inputs_pad_image_tensor,
             reference_fn=pil_reference_wrapper(F.pad_image_pil),
             reference_inputs_fn=reference_inputs_pad_image_tensor,
-            closeness_kwargs={
-                # FIXME: This tolerance effectively renders the test useless since it cannot fail. We need to
-                #  investigate for what configuration this is happening. Since uint8 works perfectly there seems to be
-                #  a bug in the kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1, rtol=0),
-            },
+            float32_vs_uint8=float32_vs_uint8_fill_adapter,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(),
             test_marks=[
                 xfail_jit_tuple_instead_of_list("padding"),
                 xfail_jit_tuple_instead_of_list("fill"),
@@ -1254,6 +1276,7 @@ def reference_inputs_pad_bounding_box():
             sample_inputs_fn=sample_inputs_pad_mask,
             reference_fn=pil_reference_wrapper(F.pad_image_pil),
             reference_inputs_fn=reference_inputs_pad_mask,
+            float32_vs_uint8=float32_vs_uint8_fill_adapter,
         ),
         KernelInfo(
             F.pad_video,
@@ -1270,14 +1293,16 @@ def reference_inputs_pad_bounding_box():
 
 def sample_inputs_perspective_image_tensor():
     for image_loader in make_image_loaders(sizes=["random"]):
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
 
 
 def reference_inputs_perspective_image_tensor():
-    for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
+    for image_loader, coefficients in itertools.product(
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS
+    ):
         # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
 
 
@@ -1312,16 +1337,12 @@ def sample_inputs_perspective_video():
             sample_inputs_fn=sample_inputs_perspective_image_tensor,
             reference_fn=pil_reference_wrapper(F.perspective_image_pil),
             reference_inputs_fn=reference_inputs_perspective_image_tensor,
+            float32_vs_uint8=float32_vs_uint8_fill_adapter,
             closeness_kwargs={
-                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=160 / 255, rtol=0, agg_method="mean"
-                ),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=160, rtol=0, agg_method="mean"
-                ),
-                **CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+                # TODO: investigate
+                **pil_reference_pixel_difference(160, agg_method="mean"),
+                **cuda_vs_cpu_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(),
             },
         ),
         KernelInfo(
@@ -1333,6 +1354,7 @@ def sample_inputs_perspective_video():
             sample_inputs_fn=sample_inputs_perspective_mask,
             reference_fn=pil_reference_wrapper(F.perspective_image_pil),
             reference_inputs_fn=reference_inputs_perspective_mask,
+            float32_vs_uint8=True,
             closeness_kwargs={
                 (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=10, rtol=0),
             },
@@ -1340,7 +1362,7 @@ def sample_inputs_perspective_video():
         KernelInfo(
             F.perspective_video,
             sample_inputs_fn=sample_inputs_perspective_video,
-            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
         ),
     ]
 )
@@ -1353,13 +1375,13 @@ def _get_elastic_displacement(spatial_size):
 def sample_inputs_elastic_image_tensor():
     for image_loader in make_image_loaders(sizes=["random"]):
         displacement = _get_elastic_displacement(image_loader.spatial_size)
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, displacement=displacement, fill=fill)
 
 
 def reference_inputs_elastic_image_tensor():
     for image_loader, interpolation in itertools.product(
-        make_image_loaders(extra_dims=[()]),
+        make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
         [
             F.InterpolationMode.NEAREST,
             F.InterpolationMode.BILINEAR,
@@ -1367,7 +1389,7 @@ def reference_inputs_elastic_image_tensor():
         ],
     ):
         displacement = _get_elastic_displacement(image_loader.spatial_size)
-        for fill in _get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
+        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill)
 
 
@@ -1406,13 +1428,9 @@ def sample_inputs_elastic_video():
             sample_inputs_fn=sample_inputs_elastic_image_tensor,
             reference_fn=pil_reference_wrapper(F.elastic_image_pil),
             reference_inputs_fn=reference_inputs_elastic_image_tensor,
-            closeness_kwargs={
-                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel for floating point inputs, given that uint8 works perfectly.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=160 / 255, rtol=0, agg_method="mean"
-                ),
-            },
+            float32_vs_uint8=float32_vs_uint8_fill_adapter,
+            # TODO: investigate
+            closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"),
         ),
         KernelInfo(
             F.elastic_bounding_box,
@@ -1423,11 +1441,9 @@ def sample_inputs_elastic_video():
             sample_inputs_fn=sample_inputs_elastic_mask,
             reference_fn=pil_reference_wrapper(F.elastic_image_pil),
             reference_inputs_fn=reference_inputs_elastic_mask,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(
-                    atol=65, rtol=0, agg_method="mean"
-                ),
-            },
+            float32_vs_uint8=True,
+            # TODO: investigate
+            closeness_kwargs=pil_reference_pixel_difference(65, agg_method="mean"),
         ),
         KernelInfo(
             F.elastic_video,
@@ -1456,7 +1472,8 @@ def sample_inputs_center_crop_image_tensor():
 
 def reference_inputs_center_crop_image_tensor():
     for image_loader, output_size in itertools.product(
-        make_image_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()]), _CENTER_CROP_OUTPUT_SIZES
+        make_image_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], dtypes=[torch.uint8]),
+        _CENTER_CROP_OUTPUT_SIZES,
     ):
         yield ArgsKwargs(image_loader, output_size=output_size)
 
@@ -1497,9 +1514,7 @@ def sample_inputs_center_crop_video():
             sample_inputs_fn=sample_inputs_center_crop_image_tensor,
             reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
             reference_inputs_fn=reference_inputs_center_crop_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
             test_marks=[
                 xfail_jit_python_scalar_arg("output_size"),
             ],
@@ -1516,6 +1531,7 @@ def sample_inputs_center_crop_video():
             sample_inputs_fn=sample_inputs_center_crop_mask,
             reference_fn=pil_reference_wrapper(F.center_crop_image_pil),
             reference_inputs_fn=reference_inputs_center_crop_mask,
+            float32_vs_uint8=True,
             test_marks=[
                 xfail_jit_python_scalar_arg("output_size"),
             ],
@@ -1552,7 +1568,7 @@ def sample_inputs_gaussian_blur_video():
         KernelInfo(
             F.gaussian_blur_image_tensor,
             sample_inputs_fn=sample_inputs_gaussian_blur_image_tensor,
-            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
             test_marks=[
                 xfail_jit_python_scalar_arg("kernel_size"),
                 xfail_jit_python_scalar_arg("sigma"),
@@ -1561,7 +1577,7 @@ def sample_inputs_gaussian_blur_video():
         KernelInfo(
             F.gaussian_blur_video,
             sample_inputs_fn=sample_inputs_gaussian_blur_video,
-            closeness_kwargs=CUDA_VS_CPU_SINGLE_PIXEL_DIFFERENCE,
+            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
         ),
     ]
 )
@@ -1596,7 +1612,7 @@ def make_beta_distributed_image(shape, dtype, device, *, alpha, beta):
 
     spatial_size = (256, 256)
     for dtype, color_space, fn in itertools.product(
-        [torch.uint8, torch.float32],
+        [torch.uint8],
         [features.ColorSpace.GRAY, features.ColorSpace.RGB],
         [
             lambda shape, dtype, device: torch.zeros(shape, dtype=dtype, device=device),
@@ -1640,12 +1656,8 @@ def sample_inputs_equalize_video():
             kernel_name="equalize_image_tensor",
             sample_inputs_fn=sample_inputs_equalize_image_tensor,
             reference_fn=pil_reference_wrapper(F.equalize_image_pil),
+            float32_vs_uint8=True,
             reference_inputs_fn=reference_inputs_equalize_image_tensor,
-            closeness_kwargs={
-                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel for floating point inputs, given that uint8 works perfectly.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=12 / 255, rtol=0),
-            },
         ),
         KernelInfo(
             F.equalize_video,
@@ -1664,7 +1676,7 @@ def sample_inputs_invert_image_tensor():
 
 def reference_inputs_invert_image_tensor():
     for image_loader in make_image_loaders(
-        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]
+        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
     ):
         yield ArgsKwargs(image_loader)
 
@@ -1682,9 +1694,7 @@ def sample_inputs_invert_video():
             sample_inputs_fn=sample_inputs_invert_image_tensor,
             reference_fn=pil_reference_wrapper(F.invert_image_pil),
             reference_inputs_fn=reference_inputs_invert_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
         ),
         KernelInfo(
             F.invert_video,
@@ -1727,6 +1737,8 @@ def sample_inputs_posterize_video():
             sample_inputs_fn=sample_inputs_posterize_image_tensor,
             reference_fn=pil_reference_wrapper(F.posterize_image_pil),
             reference_inputs_fn=reference_inputs_posterize_image_tensor,
+            float32_vs_uint8=True,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(),
         ),
         KernelInfo(
             F.posterize_video,
@@ -1749,22 +1761,18 @@ def sample_inputs_solarize_image_tensor():
         yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype)))
 
 
-def reference_solarize_image_tensor(image, threshold):
-    # The `pil_reference_wrapper` converts floating point tensor images into uint8 PIL images. In that case we also
-    # need to scale the threshold accordingly.
-    if image.dtype.is_floating_point:
-        threshold *= 255
-    return pil_reference_wrapper(F.solarize_image_pil)(image, threshold)
-
-
 def reference_inputs_solarize_image_tensor():
     for image_loader in make_image_loaders(
-        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]
+        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
     ):
         for threshold in _get_solarize_thresholds(image_loader.dtype):
             yield ArgsKwargs(image_loader, threshold=threshold)
 
 
+def uint8_to_float32_threshold_adapter(other_args, kwargs):
+    return other_args, dict(threshold=kwargs["threshold"] / 255)
+
+
 def sample_inputs_solarize_video():
     for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
         yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype)))
@@ -1776,13 +1784,10 @@ def sample_inputs_solarize_video():
             F.solarize_image_tensor,
             kernel_name="solarize_image_tensor",
             sample_inputs_fn=sample_inputs_solarize_image_tensor,
-            reference_fn=reference_solarize_image_tensor,
+            reference_fn=pil_reference_wrapper(F.solarize_image_pil),
             reference_inputs_fn=reference_inputs_solarize_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(
-                    atol=1 / 255, rtol=0, agg_method="mean"
-                ),
-            },
+            float32_vs_uint8=uint8_to_float32_threshold_adapter,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(),
         ),
         KernelInfo(
             F.solarize_video,
@@ -1801,7 +1806,7 @@ def sample_inputs_autocontrast_image_tensor():
 
 def reference_inputs_autocontrast_image_tensor():
     for image_loader in make_image_loaders(
-        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]
+        color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
     ):
         yield ArgsKwargs(image_loader)
 
@@ -1819,9 +1824,8 @@ def sample_inputs_autocontrast_video():
             sample_inputs_fn=sample_inputs_autocontrast_image_tensor,
             reference_fn=pil_reference_wrapper(F.autocontrast_image_pil),
             reference_inputs_fn=reference_inputs_autocontrast_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(),
         ),
         KernelInfo(
             F.autocontrast_video,
@@ -1843,7 +1847,9 @@ def sample_inputs_adjust_sharpness_image_tensor():
 
 def reference_inputs_adjust_sharpness_image_tensor():
     for image_loader, sharpness_factor in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_SHARPNESS_FACTORS,
     ):
         yield ArgsKwargs(image_loader, sharpness_factor=sharpness_factor)
@@ -1862,9 +1868,8 @@ def sample_inputs_adjust_sharpness_video():
             sample_inputs_fn=sample_inputs_adjust_sharpness_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_sharpness_image_pil),
             reference_inputs_fn=reference_inputs_adjust_sharpness_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(2),
         ),
         KernelInfo(
             F.adjust_sharpness_video,
@@ -1916,7 +1921,9 @@ def sample_inputs_adjust_brightness_image_tensor():
 
 def reference_inputs_adjust_brightness_image_tensor():
     for image_loader, brightness_factor in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_BRIGHTNESS_FACTORS,
     ):
         yield ArgsKwargs(image_loader, brightness_factor=brightness_factor)
@@ -1935,9 +1942,8 @@ def sample_inputs_adjust_brightness_video():
             sample_inputs_fn=sample_inputs_adjust_brightness_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_brightness_image_pil),
             reference_inputs_fn=reference_inputs_adjust_brightness_image_tensor,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
-            },
+            float32_vs_uint8=True,
+            closeness_kwargs=float32_vs_uint8_pixel_difference(),
         ),
         KernelInfo(
             F.adjust_brightness_video,
@@ -1959,7 +1965,9 @@ def sample_inputs_adjust_contrast_image_tensor():
 
 def reference_inputs_adjust_contrast_image_tensor():
     for image_loader, contrast_factor in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_CONTRAST_FACTORS,
     ):
         yield ArgsKwargs(image_loader, contrast_factor=contrast_factor)
@@ -1978,9 +1986,10 @@ def sample_inputs_adjust_contrast_video():
             sample_inputs_fn=sample_inputs_adjust_contrast_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_contrast_image_pil),
             reference_inputs_fn=reference_inputs_adjust_contrast_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=2 / 255, rtol=0),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+                **pil_reference_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(2),
             },
         ),
         KernelInfo(
@@ -2006,7 +2015,9 @@ def sample_inputs_adjust_gamma_image_tensor():
 
 def reference_inputs_adjust_gamma_image_tensor():
     for image_loader, (gamma, gain) in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_GAMMA_GAMMAS_GAINS,
     ):
         yield ArgsKwargs(image_loader, gamma=gamma, gain=gain)
@@ -2026,11 +2037,10 @@ def sample_inputs_adjust_gamma_video():
             sample_inputs_fn=sample_inputs_adjust_gamma_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_gamma_image_pil),
             reference_inputs_fn=reference_inputs_adjust_gamma_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                # TODO: This tolerance is noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel for floating point inputs, given that uint8 works almost perfectly.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=40 / 255, rtol=0),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+                **pil_reference_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(),
             },
         ),
         KernelInfo(
@@ -2053,7 +2063,9 @@ def sample_inputs_adjust_hue_image_tensor():
 
 def reference_inputs_adjust_hue_image_tensor():
     for image_loader, hue_factor in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_HUE_FACTORS,
     ):
         yield ArgsKwargs(image_loader, hue_factor=hue_factor)
@@ -2072,11 +2084,11 @@ def sample_inputs_adjust_hue_video():
             sample_inputs_fn=sample_inputs_adjust_hue_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_hue_image_pil),
             reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                # TODO: These tolerances are noticeably larger than "regular" ones. Investigate if this is a bug in the
-                #  kernel.
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=20 / 255, rtol=0),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=20, rtol=0),
+                # TODO: investigate
+                **pil_reference_pixel_difference(20),
+                **float32_vs_uint8_pixel_difference(),
             },
         ),
         KernelInfo(
@@ -2098,7 +2110,9 @@ def sample_inputs_adjust_saturation_image_tensor():
 
 def reference_inputs_adjust_saturation_image_tensor():
     for image_loader, saturation_factor in itertools.product(
-        make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]),
+        make_image_loaders(
+            color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8]
+        ),
         _ADJUST_SATURATION_FACTORS,
     ):
         yield ArgsKwargs(image_loader, saturation_factor=saturation_factor)
@@ -2117,9 +2131,10 @@ def sample_inputs_adjust_saturation_video():
             sample_inputs_fn=sample_inputs_adjust_saturation_image_tensor,
             reference_fn=pil_reference_wrapper(F.adjust_saturation_image_pil),
             reference_inputs_fn=reference_inputs_adjust_saturation_image_tensor,
+            float32_vs_uint8=True,
             closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=3 / 255, rtol=0),
-                (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): dict(atol=1, rtol=0),
+                **pil_reference_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(2),
             },
         ),
         KernelInfo(
@@ -2169,7 +2184,9 @@ def sample_inputs_five_crop_image_tensor():
 
 def reference_inputs_five_crop_image_tensor():
     for size in _FIVE_TEN_CROP_SIZES:
-        for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()]):
+        for image_loader in make_image_loaders(
+            sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()], dtypes=[torch.uint8]
+        ):
             yield ArgsKwargs(image_loader, size=size)
 
 
@@ -2191,7 +2208,9 @@ def sample_inputs_ten_crop_image_tensor():
 
 def reference_inputs_ten_crop_image_tensor():
     for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]):
-        for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()]):
+        for image_loader in make_image_loaders(
+            sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()], dtypes=[torch.uint8]
+        ):
             yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip)
 
 
@@ -2225,9 +2244,6 @@ def wrapper(input_tensor, *other_args, **kwargs):
             reference_fn=multi_crop_pil_reference_wrapper(F.five_crop_image_pil),
             reference_inputs_fn=reference_inputs_five_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
         ),
         KernelInfo(
             F.five_crop_video,
@@ -2240,9 +2256,6 @@ def wrapper(input_tensor, *other_args, **kwargs):
             reference_fn=multi_crop_pil_reference_wrapper(F.ten_crop_image_pil),
             reference_inputs_fn=reference_inputs_ten_crop_image_tensor,
             test_marks=_common_five_ten_crop_marks,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.float32, "cpu"): dict(atol=1 / 255, rtol=0),
-            },
         ),
         KernelInfo(
             F.ten_crop_video,
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 13ed4f527e4..c45bddae56b 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -11,7 +11,7 @@
 
 import torch
 from common_utils import cache, cpu_and_gpu, needs_cuda, set_rng_seed
-from prototype_common_utils import assert_close, make_bounding_boxes, make_image
+from prototype_common_utils import assert_close, make_bounding_boxes, make_image, parametrized_error_message
 from prototype_transforms_dispatcher_infos import DISPATCHER_INFOS
 from prototype_transforms_kernel_infos import KERNEL_INFOS
 from torch.utils._pytree import tree_map
@@ -131,6 +131,7 @@ def test_scripted_vs_eager(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device),
+            msg=parametrized_error_message(*other_args, *kwargs),
         )
 
     def _unbatch(self, batch, *, data_dims):
@@ -187,6 +188,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=batched_input.dtype, device=batched_input.device),
+            msg=parametrized_error_message(*other_args, *kwargs),
         )
 
     @sample_inputs
@@ -216,6 +218,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs):
             output_cpu,
             check_device=False,
             **info.get_closeness_kwargs(test_id, dtype=input_cuda.dtype, device=input_cuda.device),
+            msg=parametrized_error_message(*other_args, *kwargs),
         )
 
     @sample_inputs
@@ -242,6 +245,34 @@ def test_against_reference(self, test_id, info, args_kwargs):
             actual,
             expected,
             **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device),
+            msg=parametrized_error_message(*other_args, *kwargs),
+        )
+
+    @make_info_args_kwargs_parametrization(
+        [info for info in KERNEL_INFOS if info.float32_vs_uint8],
+        args_kwargs_fn=lambda info: info.reference_inputs_fn(),
+    )
+    def test_float32_vs_uint8(self, test_id, info, args_kwargs):
+        (input, *other_args), kwargs = args_kwargs.load("cpu")
+
+        if input.dtype != torch.uint8:
+            pytest.skip(f"Input dtype is {input.dtype}.")
+
+        adapted_other_args, adapted_kwargs = info.float32_vs_uint8(other_args, kwargs)
+
+        actual = info.kernel(
+            F.convert_dtype_image_tensor(input, dtype=torch.float32),
+            *adapted_other_args,
+            **adapted_kwargs,
+        )
+
+        expected = F.convert_dtype_image_tensor(info.kernel(input, *other_args, **kwargs), dtype=torch.float32)
+
+        assert_close(
+            actual,
+            expected,
+            **info.get_closeness_kwargs(test_id, dtype=torch.float32, device=input.device),
+            msg=parametrized_error_message(*other_args, *kwargs),
         )
 
 
@@ -439,29 +470,6 @@ def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     assert output.device == input.device
 
 
-@pytest.mark.parametrize(
-    ("info", "args_kwargs"),
-    make_info_args_kwargs_params(
-        KERNEL_INFOS_MAP[F.posterize_image_tensor],
-        args_kwargs_fn=lambda info: info.reference_inputs_fn(),
-    ),
-)
-def test_posterize_image_tensor_float_vs_int(info, args_kwargs):
-    (input, *other_args), kwargs = args_kwargs.load("cpu")
-
-    actual = F.convert_dtype_image_tensor(
-        info.kernel(
-            F.convert_dtype_image_tensor(input, dtype=torch.float32),
-            *other_args,
-            **kwargs,
-        ),
-        dtype=input.dtype,
-    )
-    expected = info.kernel(input, *other_args, **kwargs)
-
-    assert_close(actual, expected, atol=1, rtol=0)
-
-
 # TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in
 #  `prototype_transforms_kernel_infos.py`
 

From 0977def624ae010571c1c8e12d065f6bd0fa1279 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 10 Nov 2022 16:49:29 +0100
Subject: [PATCH 4/6] increase tolerance for elastic_mask

---
 test/prototype_transforms_kernel_infos.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
index dafbf9abc97..1dd724f58de 100644
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -1443,7 +1443,7 @@ def sample_inputs_elastic_video():
             reference_inputs_fn=reference_inputs_elastic_mask,
             float32_vs_uint8=True,
             # TODO: investigate
-            closeness_kwargs=pil_reference_pixel_difference(65, agg_method="mean"),
+            closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"),
         ),
         KernelInfo(
             F.elastic_video,

From 956db81f918333e2fb67e5fac8aae3ede2768191 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 10 Nov 2022 17:17:51 +0100
Subject: [PATCH 5/6] fix autocontrast tolerances

---
 test/prototype_common_utils.py            | 23 +++++++++++++----------
 test/prototype_transforms_kernel_infos.py |  5 ++++-
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py
index 69a33aa3178..9a613901e6a 100644
--- a/test/prototype_common_utils.py
+++ b/test/prototype_common_utils.py
@@ -143,18 +143,21 @@ def to_str(obj):
         else:
             return repr(obj)
 
-    postfix = "\n".join(
-        [
-            "",
-            "Failure happened for the following parameters:",
-            "",
-            *[to_str(arg) for arg in args],
-            *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
-        ]
-    )
+    if args or kwargs:
+        postfix = "\n".join(
+            [
+                "",
+                "Failure happened for the following parameters:",
+                "",
+                *[to_str(arg) for arg in args],
+                *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
+            ]
+        )
+    else:
+        postfix = ""
 
     def wrapper(msg):
-        return f"{msg}\n{postfix}"
+        return msg + postfix
 
     return wrapper
 
diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
index 1dd724f58de..361a921b18e 100644
--- a/test/prototype_transforms_kernel_infos.py
+++ b/test/prototype_transforms_kernel_infos.py
@@ -1825,7 +1825,10 @@ def sample_inputs_autocontrast_video():
             reference_fn=pil_reference_wrapper(F.autocontrast_image_pil),
             reference_inputs_fn=reference_inputs_autocontrast_image_tensor,
             float32_vs_uint8=True,
-            closeness_kwargs=float32_vs_uint8_pixel_difference(),
+            closeness_kwargs={
+                **pil_reference_pixel_difference(),
+                **float32_vs_uint8_pixel_difference(),
+            },
         ),
         KernelInfo(
             F.autocontrast_video,

From b0eded358e3fbf37c8a52d326f741cf604374ee2 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 10 Nov 2022 17:49:19 +0100
Subject: [PATCH 6/6] increase tolerance for RandomAutocontrast

---
 test/test_prototype_transforms_consistency.py | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py
index a9a283b8fd6..0cc52f8b838 100644
--- a/test/test_prototype_transforms_consistency.py
+++ b/test/test_prototype_transforms_consistency.py
@@ -244,16 +244,19 @@ def __init__(
             ArgsKwargs(p=1, threshold=0.99),
         ],
     ),
-    ConsistencyConfig(
-        prototype_transforms.RandomAutocontrast,
-        legacy_transforms.RandomAutocontrast,
-        [
-            ArgsKwargs(p=0),
-            ArgsKwargs(p=1),
-        ],
-        # Use default tolerances of `torch.testing.assert_close`
-        closeness_kwargs=dict(rtol=None, atol=None),
-    ),
+    *[
+        ConsistencyConfig(
+            prototype_transforms.RandomAutocontrast,
+            legacy_transforms.RandomAutocontrast,
+            [
+                ArgsKwargs(p=0),
+                ArgsKwargs(p=1),
+            ],
+            make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[dt]),
+            closeness_kwargs=ckw,
+        )
+        for dt, ckw in [(torch.uint8, dict(atol=1, rtol=0)), (torch.float32, dict(rtol=None, atol=None))]
+    ],
     ConsistencyConfig(
         prototype_transforms.RandomAdjustSharpness,
         legacy_transforms.RandomAdjustSharpness,