From d51eb7c0eb876d161fab58905999288983b4c29c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 4 Mar 2022 15:02:56 +0100 Subject: [PATCH 1/7] port RandomZoomOut from detection references to prototype transforms --- .../prototype/features/_bounding_box.py | 2 +- torchvision/prototype/transforms/__init__.py | 2 +- torchvision/prototype/transforms/_geometry.py | 61 ++++++++++++- .../transforms/functional/__init__.py | 4 + .../transforms/functional/_geometry.py | 86 ++++++++++++++++++- .../prototype/transforms/functional/_meta.py | 21 +++-- 6 files changed, 166 insertions(+), 10 deletions(-) diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index fbf66b80efe..cd5cdc69836 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox: from torchvision.prototype.transforms.functional import convert_bounding_box_format if isinstance(format, str): - format = BoundingBoxFormat[format] + format = BoundingBoxFormat.from_str(format.upper()) return BoundingBox.new_like( self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py index 98ad7ae0d74..ce1de6a293a 100644 --- a/torchvision/prototype/transforms/__init__.py +++ b/torchvision/prototype/transforms/__init__.py @@ -7,7 +7,7 @@ from ._augment import RandomErasing, RandomMixup, RandomCutmix from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix from ._container import Compose, RandomApply, RandomChoice, RandomOrder -from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop +from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, RandomZoomOut from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace from ._misc import Identity, Normalize, ToDtype, Lambda from ._presets import CocoEval, ImageNetEval, VocEval, Kinect400Eval, RaftEval diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 6f4f7a6cb4d..b478e82d821 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -1,6 +1,6 @@ import math import warnings -from typing import Any, Dict, List, Union, Sequence, Tuple, cast +from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Optional import PIL.Image import torch @@ -168,3 +168,62 @@ def forward(self, *inputs: Any) -> Any: if has_any(sample, features.BoundingBox, features.SegmentationMask): raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()") return super().forward(sample) + + +class RandomZoomOut(Transform): + def __init__( + self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + ) -> None: + super().__init__() + + if fill is None: + fill = [0.0, 0.0, 0.0] + self.fill = fill + + self.side_range = side_range + if side_range[0] < 1.0 or side_range[0] > side_range[1]: + raise ValueError(f"Invalid canvas side range provided {side_range}.") + + self.p = p + + def _get_params(self, sample: Any) -> Dict[str, Any]: + image = query_image(sample) + _, orig_h, orig_w = get_image_dimensions(image) + + r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) + canvas_width = int(orig_w * r) + canvas_height = int(orig_h * r) + + r = torch.rand(2) + left = int((canvas_width - orig_w) * r[0]) + top = int((canvas_height - orig_h) * r[1]) + right = canvas_width - (left + orig_w) + bottom = canvas_height - (top + orig_h) + + return dict(left=left, top=top, right=right, bottom=bottom) + + def _transform(self, input: Any, params: Dict[str, Any]) -> Any: + if isinstance(input, features.Image): + output = F.zoom_out_image_tensor(input, **params, fill=self.fill) + return features.Image.new_like(input, output) + elif isinstance(input, torch.Tensor) and not isinstance(input, features._Feature): + return F.zoom_out_image_tensor(input, **params, fill=self.fill) + elif isinstance(input, PIL.Image.Image): + return F.zoom_out_image_pil(input, **params, fill=self.fill) + elif isinstance(input, features.BoundingBox): + output = F.zoom_out_bounding_box(input, **params, format=input.format) + + height, width = input.image_size + height += params["top"] + params["bottom"] + width += params["left"] + params["right"] + + return features.BoundingBox.new_like(input, output, image_size=(height, width)) + else: + return input + + def forward(self, *inputs: Any) -> Any: + sample = inputs if len(inputs) > 1 else inputs[0] + if torch.rand(1) >= self.p: + return sample + + return super().forward(sample) diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index e3fe60a7919..f91b61fec2c 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -54,12 +54,16 @@ rotate_image_pil, pad_image_tensor, pad_image_pil, + pad_bounding_box, crop_image_tensor, crop_image_pil, perspective_image_tensor, perspective_image_pil, vertical_flip_image_tensor, vertical_flip_image_pil, + zoom_out_image_pil, + zoom_out_image_tensor, + zoom_out_bounding_box, ) from ._misc import normalize_image_tensor, gaussian_blur_image_tensor from ._type_conversion import decode_image_with_pil, decode_video_with_av, label_to_one_hot diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 080fe5da891..337753431a5 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -21,7 +21,7 @@ def horizontal_flip_bounding_box( shape = bounding_box.shape bounding_box = convert_bounding_box_format( - bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY + bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True ).view(-1, 4) bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]] @@ -210,6 +210,47 @@ def rotate_image_pil( pad_image_tensor = _FT.pad pad_image_pil = _FP.pad + +# TODO: this was copy-pasted from _FT.pad. Use this if _FT.pad is actually defined here +def _parse_pad_padding(padding: List[int]) -> List[int]: + if isinstance(padding, int): + if torch.jit.is_scripting(): + # This maybe unreachable + raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]") + pad_left = pad_right = pad_top = pad_bottom = padding + elif len(padding) == 1: + pad_left = pad_right = pad_top = pad_bottom = padding[0] + elif len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + else: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + return [pad_left, pad_right, pad_top, pad_bottom] + + +def pad_bounding_box( + bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat +) -> torch.Tensor: + left, _, top, _ = _parse_pad_padding(padding) + + shape = bounding_box.shape + + bounding_box = convert_bounding_box_format( + bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True + ).view(-1, 4) + + bounding_box[:, 0::2] += left + bounding_box[:, 1::2] += top + + return convert_bounding_box_format( + bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format + ).view(shape) + + crop_image_tensor = _FT.crop crop_image_pil = _FP.crop @@ -314,3 +355,46 @@ def resized_crop_image_pil( ) -> PIL.Image.Image: img = crop_image_pil(img, top, left, height, width) return resize_image_pil(img, size, interpolation=interpolation) + + +def zoom_out_image_tensor( + image: torch.Tensor, + left: int, + top: int, + right: int, + bottom: int, + fill: List[float] = (0.0,), # type: ignore[assignment] +) -> torch.Tensor: + num_channels, height, width = get_dimensions_image_tensor(image) + + # PyTorch's pad supports only integers on fill. So we need to overwrite the colour + output = pad_image_tensor(image, [left, top, right, bottom], fill=0, padding_mode="constant") + + if not isinstance(fill, (list, tuple)): + fill = [fill] * num_channels + fill = torch.tensor(fill).to().view(-1, 1, 1) + + output[..., :top, :] = fill + output[..., :, :left] = fill + output[..., (top + height) :, :] = fill + output[..., :, (left + width) :] = fill + + return output + + +def zoom_out_image_pil( + img: PIL.Image.Image, + left: int, + top: int, + right: int, + bottom: int, + fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, +) -> PIL.Image.Image: + fill = tuple(int(v) for v in _FP._parse_fill(fill, img, name="fill")["fill"]) + return pad_image_pil(img, [left, top, right, bottom], fill=fill, padding_mode="constant") + + +def zoom_out_bounding_box( + bounding_box: torch.Tensor, left: int, top: int, right: int, bottom: int, format: features.BoundingBoxFormat +) -> torch.Tensor: + return pad_bounding_box(bounding_box, [left, top, right, bottom], format=format) diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py index 5062c266959..4309caa6f77 100644 --- a/torchvision/prototype/transforms/functional/_meta.py +++ b/torchvision/prototype/transforms/functional/_meta.py @@ -39,10 +39,13 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor: def convert_bounding_box_format( - bounding_box: torch.Tensor, *, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat + bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = False ) -> torch.Tensor: if new_format == old_format: - return bounding_box.clone() + if copy: + return bounding_box.clone() + else: + return bounding_box if old_format == BoundingBoxFormat.XYWH: bounding_box = _xywh_to_xyxy(bounding_box) @@ -64,10 +67,13 @@ def _grayscale_to_rgb_tensor(grayscale: torch.Tensor) -> torch.Tensor: def convert_image_color_space_tensor( - image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace + image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False ) -> torch.Tensor: if new_color_space == old_color_space: - return image.clone() + if copy: + return image.clone() + else: + return image if old_color_space == ColorSpace.GRAYSCALE: image = _grayscale_to_rgb_tensor(image) @@ -83,10 +89,13 @@ def _grayscale_to_rgb_pil(grayscale: PIL.Image.Image) -> PIL.Image.Image: def convert_image_color_space_pil( - image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace + image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False ) -> PIL.Image.Image: if new_color_space == old_color_space: - return image.copy() + if copy: + return image.copy() + else: + return image if old_color_space == ColorSpace.GRAYSCALE: image = _grayscale_to_rgb_pil(image) From f4e7de52e5a0c16b834ede12fb2c01a498d50206 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 7 Mar 2022 16:22:27 +0100 Subject: [PATCH 2/7] copy by default --- torchvision/prototype/transforms/functional/_geometry.py | 8 ++++---- torchvision/prototype/transforms/functional/_meta.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 583d7d23e57..1328842d74e 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -21,13 +21,13 @@ def horizontal_flip_bounding_box( shape = bounding_box.shape bounding_box = convert_bounding_box_format( - bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True + bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY ).view(-1, 4) bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]] return convert_bounding_box_format( - bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format + bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False ).view(shape) @@ -240,14 +240,14 @@ def pad_bounding_box( shape = bounding_box.shape bounding_box = convert_bounding_box_format( - bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True + bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY ).view(-1, 4) bounding_box[:, 0::2] += left bounding_box[:, 1::2] += top return convert_bounding_box_format( - bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format + bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False ).view(shape) diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py index 4309caa6f77..d28577cd155 100644 --- a/torchvision/prototype/transforms/functional/_meta.py +++ b/torchvision/prototype/transforms/functional/_meta.py @@ -39,7 +39,7 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor: def convert_bounding_box_format( - bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = False + bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = True ) -> torch.Tensor: if new_format == old_format: if copy: @@ -67,7 +67,7 @@ def _grayscale_to_rgb_tensor(grayscale: torch.Tensor) -> torch.Tensor: def convert_image_color_space_tensor( - image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False + image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True ) -> torch.Tensor: if new_color_space == old_color_space: if copy: @@ -89,7 +89,7 @@ def _grayscale_to_rgb_pil(grayscale: PIL.Image.Image) -> PIL.Image.Image: def convert_image_color_space_pil( - image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False + image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True ) -> PIL.Image.Image: if new_color_space == old_color_space: if copy: From ce7e22ca715cffbfd56c5ff0e9f3255b15c28839 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 7 Mar 2022 16:28:55 +0100 Subject: [PATCH 3/7] move padding parsing to stable API --- .../transforms/functional/_geometry.py | 23 +---------- torchvision/transforms/functional_tensor.py | 38 ++++++++++--------- 2 files changed, 22 insertions(+), 39 deletions(-) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 1328842d74e..60868761219 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -211,31 +211,10 @@ def rotate_image_pil( pad_image_pil = _FP.pad -# TODO: this was copy-pasted from _FT.pad. Use this if _FT.pad is actually defined here -def _parse_pad_padding(padding: List[int]) -> List[int]: - if isinstance(padding, int): - if torch.jit.is_scripting(): - # This maybe unreachable - raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]") - pad_left = pad_right = pad_top = pad_bottom = padding - elif len(padding) == 1: - pad_left = pad_right = pad_top = pad_bottom = padding[0] - elif len(padding) == 2: - pad_left = pad_right = padding[0] - pad_top = pad_bottom = padding[1] - else: - pad_left = padding[0] - pad_top = padding[1] - pad_right = padding[2] - pad_bottom = padding[3] - - return [pad_left, pad_right, pad_top, pad_bottom] - - def pad_bounding_box( bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat ) -> torch.Tensor: - left, _, top, _ = _parse_pad_padding(padding) + left, _, top, _ = _FT._parse_pad_padding(padding) shape = bounding_box.shape diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 18b2c721f4e..c6e8e89cab6 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -353,6 +353,26 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor: raise RuntimeError("Symmetric padding of N-D tensors are not supported yet") +def _parse_pad_padding(padding: List[int]) -> List[int]: + if isinstance(padding, int): + if torch.jit.is_scripting(): + # This maybe unreachable + raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]") + pad_left = pad_right = pad_top = pad_bottom = padding + elif len(padding) == 1: + pad_left = pad_right = pad_top = pad_bottom = padding[0] + elif len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + else: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + return [pad_left, pad_right, pad_top, pad_bottom] + + def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: _assert_image_tensor(img) @@ -372,23 +392,7 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") - if isinstance(padding, int): - if torch.jit.is_scripting(): - # This maybe unreachable - raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]") - pad_left = pad_right = pad_top = pad_bottom = padding - elif len(padding) == 1: - pad_left = pad_right = pad_top = pad_bottom = padding[0] - elif len(padding) == 2: - pad_left = pad_right = padding[0] - pad_top = pad_bottom = padding[1] - else: - pad_left = padding[0] - pad_top = padding[1] - pad_right = padding[2] - pad_bottom = padding[3] - - p = [pad_left, pad_right, pad_top, pad_bottom] + p = _parse_pad_padding(padding) if padding_mode == "edge": # remap padding_mode str From d9c73b9a5be6a0ded9d2222e035d54934d0d7171 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 14 Mar 2022 09:46:51 +0100 Subject: [PATCH 4/7] merge zoom out kernels into transform --- torchvision/prototype/transforms/_geometry.py | 48 ++++++++++++++----- .../transforms/functional/__init__.py | 3 -- .../transforms/functional/_geometry.py | 43 ----------------- 3 files changed, 35 insertions(+), 59 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 2c47e48463e..cb9e51081fa 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -260,12 +260,12 @@ def apply_recursively(obj: Any) -> Any: class RandomZoomOut(Transform): def __init__( - self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 ) -> None: super().__init__() if fill is None: - fill = [0.0, 0.0, 0.0] + fill = 0.0 self.fill = fill self.side_range = side_range @@ -276,7 +276,7 @@ def __init__( def _get_params(self, sample: Any) -> Dict[str, Any]: image = query_image(sample) - _, orig_h, orig_w = get_image_dimensions(image) + orig_c, orig_h, orig_w = get_image_dimensions(image) r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) canvas_width = int(orig_w * r) @@ -287,23 +287,45 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: top = int((canvas_height - orig_h) * r[1]) right = canvas_width - (left + orig_w) bottom = canvas_height - (top + orig_h) + padding = [left, top, right, bottom] - return dict(left=left, top=top, right=right, bottom=bottom) + fill = self.fill + if not isinstance(fill, collections.abc.Sequence): + fill = [fill] * orig_c + + return dict(padding=padding, fill=fill) def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.zoom_out_image_tensor(input, **params, fill=self.fill) - return features.Image.new_like(input, output) - elif isinstance(input, torch.Tensor) and not isinstance(input, features._Feature): - return F.zoom_out_image_tensor(input, **params, fill=self.fill) + if isinstance(input, features.Image) or is_simple_tensor(input): + # PyTorch's pad supports only integers on fill. So we need to overwrite the colour + output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant") + + left, top, right, bottom = params["padding"] + fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1) + + output[..., :top, :] = fill + output[..., :, :left] = fill + output[..., -bottom:, :] = fill + output[..., :, -right:] = fill + + if isinstance(input, features.Image): + output = features.Image.new_like(input, output) + + return output elif isinstance(input, PIL.Image.Image): - return F.zoom_out_image_pil(input, **params, fill=self.fill) + return F.pad_image_pil( + input, + params["padding"], + fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]), + padding_mode="constant", + ) elif isinstance(input, features.BoundingBox): - output = F.zoom_out_bounding_box(input, **params, format=input.format) + output = F.pad_bounding_box(input, params["padding"], format=input.format) + left, top, right, bottom = params["padding"] height, width = input.image_size - height += params["top"] + params["bottom"] - width += params["left"] + params["right"] + height += top + bottom + width += left + right return features.BoundingBox.new_like(input, output, image_size=(height, width)) else: diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index 8b6b132d52b..ed6e9989328 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -65,9 +65,6 @@ five_crop_image_pil, ten_crop_image_tensor, ten_crop_image_pil, - zoom_out_image_pil, - zoom_out_image_tensor, - zoom_out_bounding_box, ) from ._misc import normalize_image_tensor, gaussian_blur_image_tensor from ._type_conversion import decode_image_with_pil, decode_video_with_av, label_to_one_hot diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index fc42066a7ef..1bff7a3f2e6 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -410,46 +410,3 @@ def ten_crop_image_pil(img: PIL.Image.Image, size: List[int], vertical_flip: boo tl_flip, tr_flip, bl_flip, br_flip, center_flip = five_crop_image_pil(img, size) return [tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip] - - -def zoom_out_image_tensor( - image: torch.Tensor, - left: int, - top: int, - right: int, - bottom: int, - fill: List[float] = (0.0,), # type: ignore[assignment] -) -> torch.Tensor: - num_channels, height, width = get_dimensions_image_tensor(image) - - # PyTorch's pad supports only integers on fill. So we need to overwrite the colour - output = pad_image_tensor(image, [left, top, right, bottom], fill=0, padding_mode="constant") - - if not isinstance(fill, (list, tuple)): - fill = [fill] * num_channels - fill = torch.tensor(fill).to().view(-1, 1, 1) - - output[..., :top, :] = fill - output[..., :, :left] = fill - output[..., (top + height) :, :] = fill - output[..., :, (left + width) :] = fill - - return output - - -def zoom_out_image_pil( - img: PIL.Image.Image, - left: int, - top: int, - right: int, - bottom: int, - fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, -) -> PIL.Image.Image: - fill = tuple(int(v) for v in _FP._parse_fill(fill, img, name="fill")["fill"]) - return pad_image_pil(img, [left, top, right, bottom], fill=fill, padding_mode="constant") - - -def zoom_out_bounding_box( - bounding_box: torch.Tensor, left: int, top: int, right: int, bottom: int, format: features.BoundingBoxFormat -) -> torch.Tensor: - return pad_bounding_box(bounding_box, [left, top, right, bottom], format=format) From 0032ca22161f6b12288bcfa69cfbbb66099a071f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 14 Mar 2022 09:59:39 +0100 Subject: [PATCH 5/7] lint --- torchvision/prototype/transforms/_geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index cb9e51081fa..1d83906e45d 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -1,7 +1,7 @@ import collections.abc import math import warnings -from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Optional +from typing import Any, Dict, List, Union, Sequence, Tuple, cast import PIL.Image import torch From cb7025e79cbb65b610a040ae2db7d3bfa1ac7080 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 14 Mar 2022 11:52:22 +0100 Subject: [PATCH 6/7] fix filling if no padding was done --- torchvision/prototype/transforms/_geometry.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 1d83906e45d..6e3bdadfb6b 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -303,10 +303,13 @@ def _transform(self, input: Any, params: Dict[str, Any]) -> Any: left, top, right, bottom = params["padding"] fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1) + # TODO: only perform this if padding is actually needed, e.g `top > 0` output[..., :top, :] = fill output[..., :, :left] = fill - output[..., -bottom:, :] = fill - output[..., :, -right:] = fill + _, height, width = get_image_dimensions(input) + # TODO: use negative indexing, e.g. -bottom instead of (top + height) if the check above is implemented + output[..., (top + height) :, :] = fill + output[..., :, (left + width) :] = fill if isinstance(input, features.Image): output = features.Image.new_like(input, output) From b3a0acddaca06774e0fab7b834e72e5daf2abcb8 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 14 Mar 2022 13:59:11 +0100 Subject: [PATCH 7/7] resolve todo --- torchvision/prototype/transforms/_geometry.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 6e3bdadfb6b..2a965959629 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -303,13 +303,14 @@ def _transform(self, input: Any, params: Dict[str, Any]) -> Any: left, top, right, bottom = params["padding"] fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1) - # TODO: only perform this if padding is actually needed, e.g `top > 0` - output[..., :top, :] = fill - output[..., :, :left] = fill - _, height, width = get_image_dimensions(input) - # TODO: use negative indexing, e.g. -bottom instead of (top + height) if the check above is implemented - output[..., (top + height) :, :] = fill - output[..., :, (left + width) :] = fill + if top > 0: + output[..., :top, :] = fill + if left > 0: + output[..., :, :left] = fill + if bottom > 0: + output[..., -bottom:, :] = fill + if right > 0: + output[..., :, -right:] = fill if isinstance(input, features.Image): output = features.Image.new_like(input, output)