diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index e9192f44f52..333e11fb227 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -28,6 +28,7 @@ "assert_close", "assert_equal", "ArgsKwargs", + "VALID_EXTRA_DIMS", "make_image_loaders", "make_image", "make_images", @@ -201,7 +202,10 @@ def _parse_image_size(size, *, name="size"): ) -DEFAULT_EXTRA_DIMS = ((), (0,), (4,), (2, 3), (5, 0), (0, 5)) +VALID_EXTRA_DIMS = ((), (4,), (2, 3)) +DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5)) + +DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS) def from_loader(loader_fn): diff --git a/test/prototype_transforms_dispatcher_infos.py b/test/prototype_transforms_dispatcher_infos.py index 11a4c35ae18..9678249aa0b 100644 --- a/test/prototype_transforms_dispatcher_infos.py +++ b/test/prototype_transforms_dispatcher_infos.py @@ -63,17 +63,40 @@ def sample_inputs(self, *feature_types, filter_metadata=True): yield args_kwargs -def xfail_python_scalar_arg_jit(name, *, reason=None): +def xfail_jit_python_scalar_arg(name, *, reason=None): reason = reason or f"Python scalar int or float for `{name}` is not supported when scripting" return TestMark( ("TestDispatchers", "test_scripted_smoke"), pytest.mark.xfail(reason=reason), - condition=lambda args_kwargs: isinstance(args_kwargs.kwargs[name], (int, float)), + condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), (int, float)), ) -def xfail_integer_size_jit(name="size"): - return xfail_python_scalar_arg_jit(name, reason=f"Integer `{name}` is not supported when scripting.") +def xfail_jit_integer_size(name="size"): + return xfail_jit_python_scalar_arg(name, reason=f"Integer `{name}` is not supported when scripting.") + + +def xfail_jit_tuple_instead_of_list(name, *, reason=None): + reason = reason or f"Passing a tuple instead of a list for `{name}` is not supported when scripting" + return TestMark( + ("TestDispatchers", "test_scripted_smoke"), + pytest.mark.xfail(reason=reason), + condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), tuple), + ) + + +def is_list_of_ints(args_kwargs): + fill = args_kwargs.kwargs.get("fill") + return isinstance(fill, list) and any(isinstance(scalar_fill, int) for scalar_fill in fill) + + +def xfail_jit_list_of_ints(name, *, reason=None): + reason = reason or f"Passing a list of integers for `{name}` is not supported when scripting" + return TestMark( + ("TestDispatchers", "test_scripted_smoke"), + pytest.mark.xfail(reason=reason), + condition=is_list_of_ints, + ) skip_dispatch_feature = TestMark( @@ -123,7 +146,7 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F.resize_image_pil), test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), ], ), DispatcherInfo( @@ -136,7 +159,10 @@ def fill_sequence_needs_broadcast(args_kwargs): pil_kernel_info=PILKernelInfo(F.affine_image_pil), test_marks=[ xfail_dispatch_pil_if_fill_sequence_needs_broadcast, - xfail_python_scalar_arg_jit("shear"), + xfail_jit_python_scalar_arg("shear"), + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), ], ), DispatcherInfo( @@ -156,6 +182,11 @@ def fill_sequence_needs_broadcast(args_kwargs): features.Mask: F.rotate_mask, }, pil_kernel_info=PILKernelInfo(F.rotate_image_pil), + test_marks=[ + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), + ], ), DispatcherInfo( F.crop, @@ -194,7 +225,12 @@ def fill_sequence_needs_broadcast(args_kwargs): ), condition=lambda args_kwargs: fill_sequence_needs_broadcast(args_kwargs) and args_kwargs.kwargs.get("padding_mode", "constant") == "constant", - ) + ), + xfail_jit_python_scalar_arg("padding"), + xfail_jit_tuple_instead_of_list("padding"), + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), ], ), DispatcherInfo( @@ -227,7 +263,7 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F.center_crop_image_pil), test_marks=[ - xfail_integer_size_jit("output_size"), + xfail_jit_integer_size("output_size"), ], ), DispatcherInfo( @@ -237,8 +273,8 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F.gaussian_blur_image_pil), test_marks=[ - xfail_python_scalar_arg_jit("kernel_size"), - xfail_python_scalar_arg_jit("sigma"), + xfail_jit_python_scalar_arg("kernel_size"), + xfail_jit_python_scalar_arg("sigma"), ], ), DispatcherInfo( @@ -335,7 +371,7 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F.five_crop_image_pil), test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), skip_dispatch_feature, ], ), @@ -345,7 +381,7 @@ def fill_sequence_needs_broadcast(args_kwargs): features.Image: F.ten_crop_image_tensor, }, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), skip_dispatch_feature, ], pil_kernel_info=PILKernelInfo(F.ten_crop_image_pil), diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py index 2e02989b458..c0e7bf5bff4 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/prototype_transforms_kernel_infos.py @@ -12,8 +12,16 @@ import torchvision.prototype.transforms.functional as F from _pytest.mark.structures import MarkDecorator +from common_utils import cycle_over from datasets_utils import combinations_grid -from prototype_common_utils import ArgsKwargs, make_bounding_box_loaders, make_image_loaders, make_mask_loaders +from prototype_common_utils import ( + ArgsKwargs, + make_bounding_box_loaders, + make_image_loader, + make_image_loaders, + make_mask_loaders, + VALID_EXTRA_DIMS, +) from torchvision.prototype import features from torchvision.transforms.functional_tensor import _max_value as get_max_value @@ -98,17 +106,40 @@ def mark_framework_limitation(test_id, reason): return TestMark(test_id, pytest.mark.skip(reason=reason)) -def xfail_python_scalar_arg_jit(name, *, reason=None): +def xfail_jit_python_scalar_arg(name, *, reason=None): reason = reason or f"Python scalar int or float for `{name}` is not supported when scripting" return TestMark( ("TestKernels", "test_scripted_vs_eager"), pytest.mark.xfail(reason=reason), - condition=lambda args_kwargs: isinstance(args_kwargs.kwargs[name], (int, float)), + condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), (int, float)), + ) + + +def xfail_jit_integer_size(name="size"): + return xfail_jit_python_scalar_arg(name, reason=f"Integer `{name}` is not supported when scripting.") + + +def xfail_jit_tuple_instead_of_list(name, *, reason=None): + reason = reason or f"Passing a tuple instead of a list for `{name}` is not supported when scripting" + return TestMark( + ("TestKernels", "test_scripted_vs_eager"), + pytest.mark.xfail(reason=reason), + condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), tuple), ) -def xfail_integer_size_jit(name="size"): - return xfail_python_scalar_arg_jit(name, reason=f"Integer `{name}` is not supported when scripting.") +def is_list_of_ints(args_kwargs): + fill = args_kwargs.kwargs.get("fill") + return isinstance(fill, list) and any(isinstance(scalar_fill, int) for scalar_fill in fill) + + +def xfail_jit_list_of_ints(name, *, reason=None): + reason = reason or f"Passing a list of integers for `{name}` is not supported when scripting" + return TestMark( + ("TestKernels", "test_scripted_vs_eager"), + pytest.mark.xfail(reason=reason), + condition=is_list_of_ints, + ) KERNEL_INFOS = [] @@ -173,15 +204,33 @@ def _get_resize_sizes(image_size): def sample_inputs_resize_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ): + for size in _get_resize_sizes(image_loader.image_size): + yield ArgsKwargs(image_loader, size=size) + for image_loader, interpolation in itertools.product( - make_image_loaders(dtypes=[torch.float32]), + make_image_loaders(sizes=["random"], color_spaces=[features.ColorSpace.RGB]), [ F.InterpolationMode.NEAREST, + F.InterpolationMode.BILINEAR, F.InterpolationMode.BICUBIC, ], ): - for size in _get_resize_sizes(image_loader.image_size): - yield ArgsKwargs(image_loader, size=size, interpolation=interpolation) + yield ArgsKwargs(image_loader, size=[min(image_loader.image_size) + 1], interpolation=interpolation) + + # We have a speed hack in place for nearest interpolation and single channel images (grayscale) + for image_loader in make_image_loaders( + sizes=["random"], + color_spaces=[features.ColorSpace.GRAY], + extra_dims=VALID_EXTRA_DIMS, + ): + yield ArgsKwargs( + image_loader, size=[min(image_loader.image_size) + 1], interpolation=F.InterpolationMode.NEAREST + ) + + yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25) @pil_reference_wrapper @@ -217,15 +266,14 @@ def reference_inputs_resize_image_tensor(): def sample_inputs_resize_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(formats=[features.BoundingBoxFormat.XYXY]): + for bounding_box_loader in make_bounding_box_loaders(): for size in _get_resize_sizes(bounding_box_loader.image_size): yield ArgsKwargs(bounding_box_loader, size=size, image_size=bounding_box_loader.image_size) def sample_inputs_resize_mask(): - for mask_loader in make_mask_loaders(dtypes=[torch.uint8]): - for size in _get_resize_sizes(mask_loader.shape[-2:]): - yield ArgsKwargs(mask_loader, size=size) + for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) @pil_reference_wrapper @@ -248,14 +296,14 @@ def reference_inputs_resize_mask(): reference_inputs_fn=reference_inputs_resize_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), ], ), KernelInfo( F.resize_bounding_box, sample_inputs_fn=sample_inputs_resize_bounding_box, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), ], ), KernelInfo( @@ -265,7 +313,7 @@ def reference_inputs_resize_mask(): reference_inputs_fn=reference_inputs_resize_mask, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), ], ), ] @@ -290,28 +338,51 @@ def _diversify_affine_kwargs_types(affine_kwargs): yield dict(affine_kwargs, shear=diverse_shear) +def _full_affine_params(**partial_params): + partial_params.setdefault("angle", 0.0) + partial_params.setdefault("translate", [0.0, 0.0]) + partial_params.setdefault("scale", 1.0) + partial_params.setdefault("shear", [0.0, 0.0]) + partial_params.setdefault("center", None) + return partial_params + + +_DIVERSE_AFFINE_PARAMS = [ + _full_affine_params(**{name: arg}) + for name, args in [ + ("angle", [1.0, 2]), + ("translate", [[1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), + ("scale", [0.5]), + ("shear", [1.0, 2, [1.0], [2], (1.0,), (2,), [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), + ("center", [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), + ] + for arg in args +] + + def sample_inputs_affine_image_tensor(): - for image_loader, interpolation_mode, center in itertools.product( - make_image_loaders(sizes=["random"], dtypes=[torch.float32]), + make_affine_image_loaders = functools.partial( + make_image_loaders, sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ) + + for image_loader, affine_params in itertools.product(make_affine_image_loaders(), _DIVERSE_AFFINE_PARAMS): + yield ArgsKwargs(image_loader, **affine_params) + + for image_loader in make_affine_image_loaders(): + fills = [None, 0.5] + if image_loader.num_channels > 1: + fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]]) + for fill in fills: + yield ArgsKwargs(image_loader, **_full_affine_params(), fill=fill) + + for image_loader, interpolation in itertools.product( + make_affine_image_loaders(), [ F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR, ], - [None, (0, 0)], - ): - for fill in [None, 128.0, 128, [12.0], [0.5] * image_loader.num_channels]: - yield ArgsKwargs( - image_loader, - interpolation=interpolation_mode, - center=center, - fill=fill, - **_AFFINE_KWARGS[0], - ) - - for image_loader, affine_kwargs in itertools.product( - make_image_loaders(sizes=["random"], dtypes=[torch.float32]), _diversify_affine_kwargs_types(_AFFINE_KWARGS[0]) ): - yield ArgsKwargs(image_loader, **affine_kwargs) + yield ArgsKwargs(image_loader, **_full_affine_params(), fill=0) def reference_inputs_affine_image_tensor(): @@ -324,22 +395,14 @@ def reference_inputs_affine_image_tensor(): def sample_inputs_affine_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - image_size=bounding_box_loader.image_size, - **_AFFINE_KWARGS[0], - ) - - for bounding_box_loader, affine_kwargs in itertools.product( - make_bounding_box_loaders(), _diversify_affine_kwargs_types(_AFFINE_KWARGS[0]) + for bounding_box_loader, affine_params in itertools.product( + make_bounding_box_loaders(formats=[features.BoundingBoxFormat.XYXY]), _DIVERSE_AFFINE_PARAMS ): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.image_size, - **affine_kwargs, + **affine_params, ) @@ -423,16 +486,8 @@ def reference_inputs_affine_bounding_box(): def sample_inputs_affine_image_mask(): - for mask_loader, center in itertools.product( - make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]), - [None, (0, 0)], - ): - yield ArgsKwargs(mask_loader, center=center, **_AFFINE_KWARGS[0]) - - for mask_loader, affine_kwargs in itertools.product( - make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]), _diversify_affine_kwargs_types(_AFFINE_KWARGS[0]) - ): - yield ArgsKwargs(mask_loader, **affine_kwargs) + for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + yield ArgsKwargs(mask_loader, **_full_affine_params()) @pil_reference_wrapper @@ -455,7 +510,12 @@ def reference_inputs_resize_mask(): reference_fn=pil_reference_wrapper(F.affine_image_pil), reference_inputs_fn=reference_inputs_affine_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, - test_marks=[xfail_python_scalar_arg_jit("shear")], + test_marks=[ + xfail_jit_python_scalar_arg("shear"), + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), + ], ), KernelInfo( F.affine_bounding_box, @@ -464,7 +524,7 @@ def reference_inputs_resize_mask(): reference_inputs_fn=reference_inputs_affine_bounding_box, closeness_kwargs=dict(atol=1, rtol=0), test_marks=[ - xfail_python_scalar_arg_jit("shear"), + xfail_jit_python_scalar_arg("shear"), ], ), KernelInfo( @@ -473,7 +533,9 @@ def reference_inputs_resize_mask(): reference_fn=reference_affine_mask, reference_inputs_fn=reference_inputs_resize_mask, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, - test_marks=[xfail_python_scalar_arg_jit("shear")], + test_marks=[ + xfail_jit_python_scalar_arg("shear"), + ], ), ] ) @@ -514,15 +576,21 @@ def reference_inputs_convert_format_bounding_box(): def sample_inputs_convert_color_space_image_tensor(): - color_spaces = set(features.ColorSpace) - {features.ColorSpace.OTHER} - for image_loader in make_image_loaders(sizes=["random"], color_spaces=color_spaces, constant_alpha=True): - old_color_space = image_loader.color_space - for params in combinations_grid(new_color_space=color_spaces - {old_color_space}, copy=(True, False)): - yield ArgsKwargs(image_loader, old_color_space=old_color_space, **params) + color_spaces = list(set(features.ColorSpace) - {features.ColorSpace.OTHER}) + + for old_color_space, new_color_space in cycle_over(color_spaces): + for image_loader in make_image_loaders(sizes=["random"], color_spaces=[old_color_space], constant_alpha=True): + yield ArgsKwargs(image_loader, old_color_space=old_color_space, new_color_space=new_color_space) + + for color_space in color_spaces: + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=[color_space], dtypes=[torch.float32], constant_alpha=True + ): + yield ArgsKwargs(image_loader, old_color_space=color_space, new_color_space=color_space, copy=False) @pil_reference_wrapper -def reference_convert_color_space_image_tensor(image_pil, old_color_space, new_color_space, copy): +def reference_convert_color_space_image_tensor(image_pil, old_color_space, new_color_space, copy=True): color_space_pil = features.ColorSpace.from_pil_mode(image_pil.mode) if color_space_pil != old_color_space: raise pytest.UsageError( @@ -600,25 +668,30 @@ def sample_inputs_vertical_flip_mask(): def sample_inputs_rotate_image_tensor(): - for image_loader, params in itertools.product( - make_image_loaders(sizes=["random"], dtypes=[torch.float32]), - combinations_grid( - interpolation=[F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], - expand=[True, False], - center=[None, (0, 0)], - ), + make_rotate_image_loaders = functools.partial( + make_image_loaders, sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ) + + for image_loader in make_rotate_image_loaders(): + yield ArgsKwargs(image_loader, angle=15.0, expand=True) + + for image_loader, center in itertools.product( + make_rotate_image_loaders(), [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)] ): - if params["center"] is not None and params["expand"]: - # Otherwise this will emit a warning and ignore center anyway - continue + yield ArgsKwargs(image_loader, angle=15.0, center=center) - for fill in [None, 0.5, [0.5] * image_loader.num_channels]: - yield ArgsKwargs( - image_loader, - angle=_ROTATE_ANGLES[0], - fill=fill, - **params, - ) + for image_loader in make_rotate_image_loaders(): + fills = [None, 0.5] + if image_loader.num_channels > 1: + fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]]) + for fill in fills: + yield ArgsKwargs(image_loader, angle=15.0, fill=fill) + + for image_loader, interpolation in itertools.product( + make_rotate_image_loaders(), + [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], + ): + yield ArgsKwargs(image_loader, angle=15.0, fill=0) def reference_inputs_rotate_image_tensor(): @@ -637,22 +710,8 @@ def sample_inputs_rotate_bounding_box(): def sample_inputs_rotate_mask(): - for image_loader, params in itertools.product( - make_image_loaders(sizes=["random"], dtypes=[torch.uint8]), - combinations_grid( - expand=[True, False], - center=[None, (0, 0)], - ), - ): - if params["center"] is not None and params["expand"]: - # Otherwise this will emit a warning and ignore center anyway - continue - - yield ArgsKwargs( - image_loader, - angle=_ROTATE_ANGLES[0], - **params, - ) + for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + yield ArgsKwargs(mask_loader, angle=15.0) @pil_reference_wrapper @@ -673,6 +732,11 @@ def reference_inputs_rotate_mask(): reference_fn=pil_reference_wrapper(F.rotate_image_pil), reference_inputs_fn=reference_inputs_rotate_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + test_marks=[ + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), + ], ), KernelInfo( F.rotate_bounding_box, @@ -692,7 +756,16 @@ def reference_inputs_rotate_mask(): def sample_inputs_crop_image_tensor(): - for image_loader, params in itertools.product(make_image_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]): + for image_loader, params in itertools.product( + make_image_loaders(sizes=[(16, 17)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32]), + [ + dict(top=4, left=3, height=7, width=8), + dict(top=-1, left=3, height=7, width=8), + dict(top=4, left=-1, height=7, width=8), + dict(top=4, left=3, height=17, width=8), + dict(top=4, left=3, height=7, width=18), + ], + ): yield ArgsKwargs(image_loader, **params) @@ -709,8 +782,8 @@ def sample_inputs_crop_bounding_box(): def sample_inputs_crop_mask(): - for mask_loader, params in itertools.product(make_mask_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]): - yield ArgsKwargs(mask_loader, **params) + for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=["random"], num_objects=["random"]): + yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8) def reference_inputs_crop_mask(): @@ -829,12 +902,34 @@ def reference_inputs_resized_crop_mask(): def sample_inputs_pad_image_tensor(): - for image_loader, params in itertools.product(make_image_loaders(sizes=["random"]), _PAD_PARAMS): - fills = [None, 128.0, 128, [12.0]] - if params["padding_mode"] == "constant": - fills.append([12.0 + c for c in range(image_loader.num_channels)]) + make_pad_image_loaders = functools.partial( + make_image_loaders, sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ) + + for image_loader, padding in itertools.product( + make_pad_image_loaders(), + [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]], + ): + yield ArgsKwargs(image_loader, padding=padding) + + for image_loader in make_pad_image_loaders(): + fills = [None, 0.5] + if image_loader.num_channels > 1: + fills.extend(vector_fill * image_loader.num_channels for vector_fill in [(0.5,), (1,), [0.5], [1]]) for fill in fills: - yield ArgsKwargs(image_loader, fill=fill, **params) + yield ArgsKwargs(image_loader, padding=[1], fill=fill) + + for image_loader, padding_mode in itertools.product( + # We branch for non-constant padding and integer inputs + make_pad_image_loaders(dtypes=[torch.uint8]), + ["constant", "symmetric", "edge", "reflect"], + ): + yield ArgsKwargs(image_loader, padding=[1], padding_mode=padding_mode) + + # `torch.nn.functional.pad` does not support symmetric padding, and thus we have a custom implementation. Besides + # negative padding, this is already handled by the inputs above. + for image_loader in make_pad_image_loaders(): + yield ArgsKwargs(image_loader, padding=[-1], padding_mode="symmetric") def reference_inputs_pad_image_tensor(): @@ -848,18 +943,21 @@ def reference_inputs_pad_image_tensor(): def sample_inputs_pad_bounding_box(): - for bounding_box_loader, params in itertools.product(make_bounding_box_loaders(), _PAD_PARAMS): - if params["padding_mode"] != "constant": - continue - + for bounding_box_loader, padding in itertools.product( + make_bounding_box_loaders(), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]] + ): yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.image_size, **params + bounding_box_loader, + format=bounding_box_loader.format, + image_size=bounding_box_loader.image_size, + padding=padding, + padding_mode="constant", ) def sample_inputs_pad_mask(): - for image_loader, fill, params in itertools.product(make_mask_loaders(sizes=["random"]), [None, 127], _PAD_PARAMS): - yield ArgsKwargs(image_loader, fill=fill, **params) + for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + yield ArgsKwargs(mask_loader, padding=[1]) def reference_inputs_pad_mask(): @@ -875,10 +973,21 @@ def reference_inputs_pad_mask(): reference_fn=pil_reference_wrapper(F.pad_image_pil), reference_inputs_fn=reference_inputs_pad_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + test_marks=[ + xfail_jit_python_scalar_arg("padding"), + xfail_jit_tuple_instead_of_list("padding"), + xfail_jit_tuple_instead_of_list("fill"), + # TODO: check if this is a regression since it seems that should be supported if `int` is ok + xfail_jit_list_of_ints("fill"), + ], ), KernelInfo( F.pad_bounding_box, sample_inputs_fn=sample_inputs_pad_bounding_box, + test_marks=[ + xfail_jit_python_scalar_arg("padding"), + xfail_jit_tuple_instead_of_list("padding"), + ], ), KernelInfo( F.pad_mask, @@ -1045,7 +1154,13 @@ def reference_inputs_elastic_mask(): def sample_inputs_center_crop_image_tensor(): for image_loader, output_size in itertools.product( - make_image_loaders(sizes=_CENTER_CROP_IMAGE_SIZES), _CENTER_CROP_OUTPUT_SIZES + make_image_loaders(sizes=[(16, 17)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32]), + [ + # valid `output_size` types for which cropping is applied to both dimensions + *[5, (4,), (2, 3), [6], [3, 2]], + # `output_size`'s for which at least one dimension needs to be padded + *[[4, 18], [17, 5], [17, 18]], + ], ): yield ArgsKwargs(image_loader, output_size=output_size) @@ -1068,10 +1183,9 @@ def sample_inputs_center_crop_bounding_box(): def sample_inputs_center_crop_mask(): - for mask_loader, output_size in itertools.product( - make_mask_loaders(sizes=_CENTER_CROP_IMAGE_SIZES), _CENTER_CROP_OUTPUT_SIZES - ): - yield ArgsKwargs(mask_loader, output_size=output_size) + for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + height, width = mask_loader.shape[-2:] + yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2)) def reference_inputs_center_crop_mask(): @@ -1090,14 +1204,14 @@ def reference_inputs_center_crop_mask(): reference_inputs_fn=reference_inputs_center_crop_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, test_marks=[ - xfail_integer_size_jit("output_size"), + xfail_jit_integer_size("output_size"), ], ), KernelInfo( F.center_crop_bounding_box, sample_inputs_fn=sample_inputs_center_crop_bounding_box, test_marks=[ - xfail_integer_size_jit("output_size"), + xfail_jit_integer_size("output_size"), ], ), KernelInfo( @@ -1107,7 +1221,7 @@ def reference_inputs_center_crop_mask(): reference_inputs_fn=reference_inputs_center_crop_mask, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, test_marks=[ - xfail_integer_size_jit("output_size"), + xfail_jit_integer_size("output_size"), ], ), ] @@ -1115,18 +1229,21 @@ def reference_inputs_center_crop_mask(): def sample_inputs_gaussian_blur_image_tensor(): - for image_loader, params in itertools.product( - make_image_loaders( - sizes=["random"], - # FIXME: kernel should support arbitrary batch sizes - extra_dims=[(), (4,)], - ), - combinations_grid( - kernel_size=[(3, 3), [3, 3], 5], - sigma=[None, (3.0, 3.0), [2.0, 2.0], 4.0, [1.5], (3.14,)], - ), + make_gaussian_blur_image_loaders = functools.partial( + make_image_loaders, + sizes=["random"], + color_spaces=[features.ColorSpace.RGB], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ) + + for image_loader, kernel_size in itertools.product(make_gaussian_blur_image_loaders(), [5, (3, 3), [3, 3]]): + yield ArgsKwargs(image_loader, kernel_size=kernel_size) + + for image_loader, sigma in itertools.product( + make_gaussian_blur_image_loaders(), [None, (3.0, 3.0), [2.0, 2.0], 4.0, [1.5], (3.14,)] ): - yield ArgsKwargs(image_loader, **params) + yield ArgsKwargs(image_loader, kernel_size=5, sigma=sigma) KERNEL_INFOS.append( @@ -1135,8 +1252,8 @@ def sample_inputs_gaussian_blur_image_tensor(): sample_inputs_fn=sample_inputs_gaussian_blur_image_tensor, closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, test_marks=[ - xfail_python_scalar_arg_jit("kernel_size"), - xfail_python_scalar_arg_jit("sigma"), + xfail_jit_python_scalar_arg("kernel_size"), + xfail_jit_python_scalar_arg("sigma"), ], ) ) @@ -1518,7 +1635,9 @@ def _get_five_ten_crop_image_size(size): def sample_inputs_five_crop_image_tensor(): for size in _FIVE_TEN_CROP_SIZES: - for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_image_size(size)]): + for image_loader in make_image_loaders( + sizes=[_get_five_ten_crop_image_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ): yield ArgsKwargs(image_loader, size=size) @@ -1530,7 +1649,9 @@ def reference_inputs_five_crop_image_tensor(): def sample_inputs_ten_crop_image_tensor(): for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]): - for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_image_size(size)]): + for image_loader in make_image_loaders( + sizes=[_get_five_ten_crop_image_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + ): yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip) @@ -1548,7 +1669,7 @@ def reference_inputs_ten_crop_image_tensor(): reference_fn=pil_reference_wrapper(F.five_crop_image_pil), reference_inputs_fn=reference_inputs_five_crop_image_tensor, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), mark_framework_limitation(("TestKernels", "test_batched_vs_single"), "Custom batching needed."), ], closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, @@ -1559,7 +1680,7 @@ def reference_inputs_ten_crop_image_tensor(): reference_fn=pil_reference_wrapper(F.ten_crop_image_pil), reference_inputs_fn=reference_inputs_ten_crop_image_tensor, test_marks=[ - xfail_integer_size_jit(), + xfail_jit_integer_size(), mark_framework_limitation(("TestKernels", "test_batched_vs_single"), "Custom batching needed."), ], closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 2be2964b9e0..20b76fbf079 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -755,12 +755,7 @@ def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Te kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=img.device) kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1]) - img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in( - img, - [ - kernel.dtype, - ], - ) + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [kernel.dtype]) # padding = (left, right, top, bottom) padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2]