From d51eb7c0eb876d161fab58905999288983b4c29c Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Fri, 4 Mar 2022 15:02:56 +0100
Subject: [PATCH 1/7] port RandomZoomOut from detection references to prototype
 transforms

---
 .../prototype/features/_bounding_box.py       |  2 +-
 torchvision/prototype/transforms/__init__.py  |  2 +-
 torchvision/prototype/transforms/_geometry.py | 61 ++++++++++++-
 .../transforms/functional/__init__.py         |  4 +
 .../transforms/functional/_geometry.py        | 86 ++++++++++++++++++-
 .../prototype/transforms/functional/_meta.py  | 21 +++--
 6 files changed, 166 insertions(+), 10 deletions(-)

diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py
index fbf66b80efe..cd5cdc69836 100644
--- a/torchvision/prototype/features/_bounding_box.py
+++ b/torchvision/prototype/features/_bounding_box.py
@@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
         from torchvision.prototype.transforms.functional import convert_bounding_box_format
 
         if isinstance(format, str):
-            format = BoundingBoxFormat[format]
+            format = BoundingBoxFormat.from_str(format.upper())
 
         return BoundingBox.new_like(
             self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py
index 98ad7ae0d74..ce1de6a293a 100644
--- a/torchvision/prototype/transforms/__init__.py
+++ b/torchvision/prototype/transforms/__init__.py
@@ -7,7 +7,7 @@
 from ._augment import RandomErasing, RandomMixup, RandomCutmix
 from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix
 from ._container import Compose, RandomApply, RandomChoice, RandomOrder
-from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop
+from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, RandomZoomOut
 from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
 from ._misc import Identity, Normalize, ToDtype, Lambda
 from ._presets import CocoEval, ImageNetEval, VocEval, Kinect400Eval, RaftEval
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index 6f4f7a6cb4d..b478e82d821 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -1,6 +1,6 @@
 import math
 import warnings
-from typing import Any, Dict, List, Union, Sequence, Tuple, cast
+from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Optional
 
 import PIL.Image
 import torch
@@ -168,3 +168,62 @@ def forward(self, *inputs: Any) -> Any:
         if has_any(sample, features.BoundingBox, features.SegmentationMask):
             raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()")
         return super().forward(sample)
+
+
+class RandomZoomOut(Transform):
+    def __init__(
+        self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
+    ) -> None:
+        super().__init__()
+
+        if fill is None:
+            fill = [0.0, 0.0, 0.0]
+        self.fill = fill
+
+        self.side_range = side_range
+        if side_range[0] < 1.0 or side_range[0] > side_range[1]:
+            raise ValueError(f"Invalid canvas side range provided {side_range}.")
+
+        self.p = p
+
+    def _get_params(self, sample: Any) -> Dict[str, Any]:
+        image = query_image(sample)
+        _, orig_h, orig_w = get_image_dimensions(image)
+
+        r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
+        canvas_width = int(orig_w * r)
+        canvas_height = int(orig_h * r)
+
+        r = torch.rand(2)
+        left = int((canvas_width - orig_w) * r[0])
+        top = int((canvas_height - orig_h) * r[1])
+        right = canvas_width - (left + orig_w)
+        bottom = canvas_height - (top + orig_h)
+
+        return dict(left=left, top=top, right=right, bottom=bottom)
+
+    def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
+        if isinstance(input, features.Image):
+            output = F.zoom_out_image_tensor(input, **params, fill=self.fill)
+            return features.Image.new_like(input, output)
+        elif isinstance(input, torch.Tensor) and not isinstance(input, features._Feature):
+            return F.zoom_out_image_tensor(input, **params, fill=self.fill)
+        elif isinstance(input, PIL.Image.Image):
+            return F.zoom_out_image_pil(input, **params, fill=self.fill)
+        elif isinstance(input, features.BoundingBox):
+            output = F.zoom_out_bounding_box(input, **params, format=input.format)
+
+            height, width = input.image_size
+            height += params["top"] + params["bottom"]
+            width += params["left"] + params["right"]
+
+            return features.BoundingBox.new_like(input, output, image_size=(height, width))
+        else:
+            return input
+
+    def forward(self, *inputs: Any) -> Any:
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        if torch.rand(1) >= self.p:
+            return sample
+
+        return super().forward(sample)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index e3fe60a7919..f91b61fec2c 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -54,12 +54,16 @@
     rotate_image_pil,
     pad_image_tensor,
     pad_image_pil,
+    pad_bounding_box,
     crop_image_tensor,
     crop_image_pil,
     perspective_image_tensor,
     perspective_image_pil,
     vertical_flip_image_tensor,
     vertical_flip_image_pil,
+    zoom_out_image_pil,
+    zoom_out_image_tensor,
+    zoom_out_bounding_box,
 )
 from ._misc import normalize_image_tensor, gaussian_blur_image_tensor
 from ._type_conversion import decode_image_with_pil, decode_video_with_av, label_to_one_hot
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 080fe5da891..337753431a5 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -21,7 +21,7 @@ def horizontal_flip_bounding_box(
     shape = bounding_box.shape
 
     bounding_box = convert_bounding_box_format(
-        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True
     ).view(-1, 4)
 
     bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]]
@@ -210,6 +210,47 @@ def rotate_image_pil(
 pad_image_tensor = _FT.pad
 pad_image_pil = _FP.pad
 
+
+# TODO: this was copy-pasted from _FT.pad. Use this if _FT.pad is actually defined here
+def _parse_pad_padding(padding: List[int]) -> List[int]:
+    if isinstance(padding, int):
+        if torch.jit.is_scripting():
+            # This maybe unreachable
+            raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    elif len(padding) == 1:
+        pad_left = pad_right = pad_top = pad_bottom = padding[0]
+    elif len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    else:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    return [pad_left, pad_right, pad_top, pad_bottom]
+
+
+def pad_bounding_box(
+    bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat
+) -> torch.Tensor:
+    left, _, top, _ = _parse_pad_padding(padding)
+
+    shape = bounding_box.shape
+
+    bounding_box = convert_bounding_box_format(
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True
+    ).view(-1, 4)
+
+    bounding_box[:, 0::2] += left
+    bounding_box[:, 1::2] += top
+
+    return convert_bounding_box_format(
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
+    ).view(shape)
+
+
 crop_image_tensor = _FT.crop
 crop_image_pil = _FP.crop
 
@@ -314,3 +355,46 @@ def resized_crop_image_pil(
 ) -> PIL.Image.Image:
     img = crop_image_pil(img, top, left, height, width)
     return resize_image_pil(img, size, interpolation=interpolation)
+
+
+def zoom_out_image_tensor(
+    image: torch.Tensor,
+    left: int,
+    top: int,
+    right: int,
+    bottom: int,
+    fill: List[float] = (0.0,),  # type: ignore[assignment]
+) -> torch.Tensor:
+    num_channels, height, width = get_dimensions_image_tensor(image)
+
+    # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
+    output = pad_image_tensor(image, [left, top, right, bottom], fill=0, padding_mode="constant")
+
+    if not isinstance(fill, (list, tuple)):
+        fill = [fill] * num_channels
+    fill = torch.tensor(fill).to().view(-1, 1, 1)
+
+    output[..., :top, :] = fill
+    output[..., :, :left] = fill
+    output[..., (top + height) :, :] = fill
+    output[..., :, (left + width) :] = fill
+
+    return output
+
+
+def zoom_out_image_pil(
+    img: PIL.Image.Image,
+    left: int,
+    top: int,
+    right: int,
+    bottom: int,
+    fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+) -> PIL.Image.Image:
+    fill = tuple(int(v) for v in _FP._parse_fill(fill, img, name="fill")["fill"])
+    return pad_image_pil(img, [left, top, right, bottom], fill=fill, padding_mode="constant")
+
+
+def zoom_out_bounding_box(
+    bounding_box: torch.Tensor, left: int, top: int, right: int, bottom: int, format: features.BoundingBoxFormat
+) -> torch.Tensor:
+    return pad_bounding_box(bounding_box, [left, top, right, bottom], format=format)
diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py
index 5062c266959..4309caa6f77 100644
--- a/torchvision/prototype/transforms/functional/_meta.py
+++ b/torchvision/prototype/transforms/functional/_meta.py
@@ -39,10 +39,13 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor:
 
 
 def convert_bounding_box_format(
-    bounding_box: torch.Tensor, *, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat
+    bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = False
 ) -> torch.Tensor:
     if new_format == old_format:
-        return bounding_box.clone()
+        if copy:
+            return bounding_box.clone()
+        else:
+            return bounding_box
 
     if old_format == BoundingBoxFormat.XYWH:
         bounding_box = _xywh_to_xyxy(bounding_box)
@@ -64,10 +67,13 @@ def _grayscale_to_rgb_tensor(grayscale: torch.Tensor) -> torch.Tensor:
 
 
 def convert_image_color_space_tensor(
-    image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace
+    image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False
 ) -> torch.Tensor:
     if new_color_space == old_color_space:
-        return image.clone()
+        if copy:
+            return image.clone()
+        else:
+            return image
 
     if old_color_space == ColorSpace.GRAYSCALE:
         image = _grayscale_to_rgb_tensor(image)
@@ -83,10 +89,13 @@ def _grayscale_to_rgb_pil(grayscale: PIL.Image.Image) -> PIL.Image.Image:
 
 
 def convert_image_color_space_pil(
-    image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace
+    image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False
 ) -> PIL.Image.Image:
     if new_color_space == old_color_space:
-        return image.copy()
+        if copy:
+            return image.copy()
+        else:
+            return image
 
     if old_color_space == ColorSpace.GRAYSCALE:
         image = _grayscale_to_rgb_pil(image)

From f4e7de52e5a0c16b834ede12fb2c01a498d50206 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 7 Mar 2022 16:22:27 +0100
Subject: [PATCH 2/7] copy by default

---
 torchvision/prototype/transforms/functional/_geometry.py | 8 ++++----
 torchvision/prototype/transforms/functional/_meta.py     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 583d7d23e57..1328842d74e 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -21,13 +21,13 @@ def horizontal_flip_bounding_box(
     shape = bounding_box.shape
 
     bounding_box = convert_bounding_box_format(
-        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
     ).view(-1, 4)
 
     bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]]
 
     return convert_bounding_box_format(
-        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
     ).view(shape)
 
 
@@ -240,14 +240,14 @@ def pad_bounding_box(
     shape = bounding_box.shape
 
     bounding_box = convert_bounding_box_format(
-        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY, copy=True
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
     ).view(-1, 4)
 
     bounding_box[:, 0::2] += left
     bounding_box[:, 1::2] += top
 
     return convert_bounding_box_format(
-        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
     ).view(shape)
 
 
diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py
index 4309caa6f77..d28577cd155 100644
--- a/torchvision/prototype/transforms/functional/_meta.py
+++ b/torchvision/prototype/transforms/functional/_meta.py
@@ -39,7 +39,7 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor:
 
 
 def convert_bounding_box_format(
-    bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = False
+    bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = True
 ) -> torch.Tensor:
     if new_format == old_format:
         if copy:
@@ -67,7 +67,7 @@ def _grayscale_to_rgb_tensor(grayscale: torch.Tensor) -> torch.Tensor:
 
 
 def convert_image_color_space_tensor(
-    image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False
+    image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True
 ) -> torch.Tensor:
     if new_color_space == old_color_space:
         if copy:
@@ -89,7 +89,7 @@ def _grayscale_to_rgb_pil(grayscale: PIL.Image.Image) -> PIL.Image.Image:
 
 
 def convert_image_color_space_pil(
-    image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = False
+    image: PIL.Image.Image, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True
 ) -> PIL.Image.Image:
     if new_color_space == old_color_space:
         if copy:

From ce7e22ca715cffbfd56c5ff0e9f3255b15c28839 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 7 Mar 2022 16:28:55 +0100
Subject: [PATCH 3/7] move padding parsing to stable API

---
 .../transforms/functional/_geometry.py        | 23 +----------
 torchvision/transforms/functional_tensor.py   | 38 ++++++++++---------
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 1328842d74e..60868761219 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -211,31 +211,10 @@ def rotate_image_pil(
 pad_image_pil = _FP.pad
 
 
-# TODO: this was copy-pasted from _FT.pad. Use this if _FT.pad is actually defined here
-def _parse_pad_padding(padding: List[int]) -> List[int]:
-    if isinstance(padding, int):
-        if torch.jit.is_scripting():
-            # This maybe unreachable
-            raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
-        pad_left = pad_right = pad_top = pad_bottom = padding
-    elif len(padding) == 1:
-        pad_left = pad_right = pad_top = pad_bottom = padding[0]
-    elif len(padding) == 2:
-        pad_left = pad_right = padding[0]
-        pad_top = pad_bottom = padding[1]
-    else:
-        pad_left = padding[0]
-        pad_top = padding[1]
-        pad_right = padding[2]
-        pad_bottom = padding[3]
-
-    return [pad_left, pad_right, pad_top, pad_bottom]
-
-
 def pad_bounding_box(
     bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat
 ) -> torch.Tensor:
-    left, _, top, _ = _parse_pad_padding(padding)
+    left, _, top, _ = _FT._parse_pad_padding(padding)
 
     shape = bounding_box.shape
 
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
index 18b2c721f4e..c6e8e89cab6 100644
--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -353,6 +353,26 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
         raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")
 
 
+def _parse_pad_padding(padding: List[int]) -> List[int]:
+    if isinstance(padding, int):
+        if torch.jit.is_scripting():
+            # This maybe unreachable
+            raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    elif len(padding) == 1:
+        pad_left = pad_right = pad_top = pad_bottom = padding[0]
+    elif len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    else:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    return [pad_left, pad_right, pad_top, pad_bottom]
+
+
 def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
     _assert_image_tensor(img)
 
@@ -372,23 +392,7 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
     if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
         raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
 
-    if isinstance(padding, int):
-        if torch.jit.is_scripting():
-            # This maybe unreachable
-            raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
-        pad_left = pad_right = pad_top = pad_bottom = padding
-    elif len(padding) == 1:
-        pad_left = pad_right = pad_top = pad_bottom = padding[0]
-    elif len(padding) == 2:
-        pad_left = pad_right = padding[0]
-        pad_top = pad_bottom = padding[1]
-    else:
-        pad_left = padding[0]
-        pad_top = padding[1]
-        pad_right = padding[2]
-        pad_bottom = padding[3]
-
-    p = [pad_left, pad_right, pad_top, pad_bottom]
+    p = _parse_pad_padding(padding)
 
     if padding_mode == "edge":
         # remap padding_mode str

From d9c73b9a5be6a0ded9d2222e035d54934d0d7171 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 14 Mar 2022 09:46:51 +0100
Subject: [PATCH 4/7] merge zoom out kernels into transform

---
 torchvision/prototype/transforms/_geometry.py | 48 ++++++++++++++-----
 .../transforms/functional/__init__.py         |  3 --
 .../transforms/functional/_geometry.py        | 43 -----------------
 3 files changed, 35 insertions(+), 59 deletions(-)

diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index 2c47e48463e..cb9e51081fa 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -260,12 +260,12 @@ def apply_recursively(obj: Any) -> Any:
 
 class RandomZoomOut(Transform):
     def __init__(
-        self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
+        self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
     ) -> None:
         super().__init__()
 
         if fill is None:
-            fill = [0.0, 0.0, 0.0]
+            fill = 0.0
         self.fill = fill
 
         self.side_range = side_range
@@ -276,7 +276,7 @@ def __init__(
 
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
-        _, orig_h, orig_w = get_image_dimensions(image)
+        orig_c, orig_h, orig_w = get_image_dimensions(image)
 
         r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
         canvas_width = int(orig_w * r)
@@ -287,23 +287,45 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         top = int((canvas_height - orig_h) * r[1])
         right = canvas_width - (left + orig_w)
         bottom = canvas_height - (top + orig_h)
+        padding = [left, top, right, bottom]
 
-        return dict(left=left, top=top, right=right, bottom=bottom)
+        fill = self.fill
+        if not isinstance(fill, collections.abc.Sequence):
+            fill = [fill] * orig_c
+
+        return dict(padding=padding, fill=fill)
 
     def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
-        if isinstance(input, features.Image):
-            output = F.zoom_out_image_tensor(input, **params, fill=self.fill)
-            return features.Image.new_like(input, output)
-        elif isinstance(input, torch.Tensor) and not isinstance(input, features._Feature):
-            return F.zoom_out_image_tensor(input, **params, fill=self.fill)
+        if isinstance(input, features.Image) or is_simple_tensor(input):
+            # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
+            output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant")
+
+            left, top, right, bottom = params["padding"]
+            fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)
+
+            output[..., :top, :] = fill
+            output[..., :, :left] = fill
+            output[..., -bottom:, :] = fill
+            output[..., :, -right:] = fill
+
+            if isinstance(input, features.Image):
+                output = features.Image.new_like(input, output)
+
+            return output
         elif isinstance(input, PIL.Image.Image):
-            return F.zoom_out_image_pil(input, **params, fill=self.fill)
+            return F.pad_image_pil(
+                input,
+                params["padding"],
+                fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]),
+                padding_mode="constant",
+            )
         elif isinstance(input, features.BoundingBox):
-            output = F.zoom_out_bounding_box(input, **params, format=input.format)
+            output = F.pad_bounding_box(input, params["padding"], format=input.format)
 
+            left, top, right, bottom = params["padding"]
             height, width = input.image_size
-            height += params["top"] + params["bottom"]
-            width += params["left"] + params["right"]
+            height += top + bottom
+            width += left + right
 
             return features.BoundingBox.new_like(input, output, image_size=(height, width))
         else:
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index 8b6b132d52b..ed6e9989328 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -65,9 +65,6 @@
     five_crop_image_pil,
     ten_crop_image_tensor,
     ten_crop_image_pil,
-    zoom_out_image_pil,
-    zoom_out_image_tensor,
-    zoom_out_bounding_box,
 )
 from ._misc import normalize_image_tensor, gaussian_blur_image_tensor
 from ._type_conversion import decode_image_with_pil, decode_video_with_av, label_to_one_hot
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index fc42066a7ef..1bff7a3f2e6 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -410,46 +410,3 @@ def ten_crop_image_pil(img: PIL.Image.Image, size: List[int], vertical_flip: boo
     tl_flip, tr_flip, bl_flip, br_flip, center_flip = five_crop_image_pil(img, size)
 
     return [tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip]
-
-
-def zoom_out_image_tensor(
-    image: torch.Tensor,
-    left: int,
-    top: int,
-    right: int,
-    bottom: int,
-    fill: List[float] = (0.0,),  # type: ignore[assignment]
-) -> torch.Tensor:
-    num_channels, height, width = get_dimensions_image_tensor(image)
-
-    # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
-    output = pad_image_tensor(image, [left, top, right, bottom], fill=0, padding_mode="constant")
-
-    if not isinstance(fill, (list, tuple)):
-        fill = [fill] * num_channels
-    fill = torch.tensor(fill).to().view(-1, 1, 1)
-
-    output[..., :top, :] = fill
-    output[..., :, :left] = fill
-    output[..., (top + height) :, :] = fill
-    output[..., :, (left + width) :] = fill
-
-    return output
-
-
-def zoom_out_image_pil(
-    img: PIL.Image.Image,
-    left: int,
-    top: int,
-    right: int,
-    bottom: int,
-    fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
-) -> PIL.Image.Image:
-    fill = tuple(int(v) for v in _FP._parse_fill(fill, img, name="fill")["fill"])
-    return pad_image_pil(img, [left, top, right, bottom], fill=fill, padding_mode="constant")
-
-
-def zoom_out_bounding_box(
-    bounding_box: torch.Tensor, left: int, top: int, right: int, bottom: int, format: features.BoundingBoxFormat
-) -> torch.Tensor:
-    return pad_bounding_box(bounding_box, [left, top, right, bottom], format=format)

From 0032ca22161f6b12288bcfa69cfbbb66099a071f Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 14 Mar 2022 09:59:39 +0100
Subject: [PATCH 5/7] lint

---
 torchvision/prototype/transforms/_geometry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index cb9e51081fa..1d83906e45d 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -1,7 +1,7 @@
 import collections.abc
 import math
 import warnings
-from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Optional
+from typing import Any, Dict, List, Union, Sequence, Tuple, cast
 
 import PIL.Image
 import torch

From cb7025e79cbb65b610a040ae2db7d3bfa1ac7080 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 14 Mar 2022 11:52:22 +0100
Subject: [PATCH 6/7] fix filling if no padding was done

---
 torchvision/prototype/transforms/_geometry.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index 1d83906e45d..6e3bdadfb6b 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -303,10 +303,13 @@ def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
             left, top, right, bottom = params["padding"]
             fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)
 
+            # TODO: only perform this if padding is actually needed, e.g `top > 0`
             output[..., :top, :] = fill
             output[..., :, :left] = fill
-            output[..., -bottom:, :] = fill
-            output[..., :, -right:] = fill
+            _, height, width = get_image_dimensions(input)
+            # TODO: use negative indexing, e.g. -bottom instead of (top + height) if the check above is implemented
+            output[..., (top + height) :, :] = fill
+            output[..., :, (left + width) :] = fill
 
             if isinstance(input, features.Image):
                 output = features.Image.new_like(input, output)

From b3a0acddaca06774e0fab7b834e72e5daf2abcb8 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 14 Mar 2022 13:59:11 +0100
Subject: [PATCH 7/7] resolve todo

---
 torchvision/prototype/transforms/_geometry.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index 6e3bdadfb6b..2a965959629 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -303,13 +303,14 @@ def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
             left, top, right, bottom = params["padding"]
             fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)
 
-            # TODO: only perform this if padding is actually needed, e.g `top > 0`
-            output[..., :top, :] = fill
-            output[..., :, :left] = fill
-            _, height, width = get_image_dimensions(input)
-            # TODO: use negative indexing, e.g. -bottom instead of (top + height) if the check above is implemented
-            output[..., (top + height) :, :] = fill
-            output[..., :, (left + width) :] = fill
+            if top > 0:
+                output[..., :top, :] = fill
+            if left > 0:
+                output[..., :, :left] = fill
+            if bottom > 0:
+                output[..., -bottom:, :] = fill
+            if right > 0:
+                output[..., :, -right:] = fill
 
             if isinstance(input, features.Image):
                 output = features.Image.new_like(input, output)