Skip to content
2 changes: 1 addition & 1 deletion torchvision/prototype/features/_bounding_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
from torchvision.prototype.transforms.functional import convert_bounding_box_format

if isinstance(format, str):
format = BoundingBoxFormat[format]
format = BoundingBoxFormat.from_str(format.upper())

return BoundingBox.new_like(
self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
Expand Down
11 changes: 10 additions & 1 deletion torchvision/prototype/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,16 @@
from ._augment import RandomErasing, RandomMixup, RandomCutmix
from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix
from ._container import Compose, RandomApply, RandomChoice, RandomOrder
from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, FiveCrop, TenCrop, BatchMultiCrop
from ._geometry import (
HorizontalFlip,
Resize,
CenterCrop,
RandomResizedCrop,
FiveCrop,
TenCrop,
BatchMultiCrop,
RandomZoomOut,
)
from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
from ._misc import Identity, Normalize, ToDtype, Lambda
from ._presets import (
Expand Down
85 changes: 85 additions & 0 deletions torchvision/prototype/transforms/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,88 @@ def apply_recursively(obj: Any) -> Any:
return obj

return apply_recursively(inputs if len(inputs) > 1 else inputs[0])


class RandomZoomOut(Transform):
def __init__(
self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
) -> None:
super().__init__()

if fill is None:
fill = 0.0
self.fill = fill

self.side_range = side_range
if side_range[0] < 1.0 or side_range[0] > side_range[1]:
raise ValueError(f"Invalid canvas side range provided {side_range}.")

self.p = p

def _get_params(self, sample: Any) -> Dict[str, Any]:
image = query_image(sample)
orig_c, orig_h, orig_w = get_image_dimensions(image)

r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
canvas_width = int(orig_w * r)
canvas_height = int(orig_h * r)

r = torch.rand(2)
left = int((canvas_width - orig_w) * r[0])
top = int((canvas_height - orig_h) * r[1])
right = canvas_width - (left + orig_w)
bottom = canvas_height - (top + orig_h)
padding = [left, top, right, bottom]

fill = self.fill
if not isinstance(fill, collections.abc.Sequence):
fill = [fill] * orig_c

return dict(padding=padding, fill=fill)

def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
if isinstance(input, features.Image) or is_simple_tensor(input):
# PyTorch's pad supports only integers on fill. So we need to overwrite the colour
output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant")

left, top, right, bottom = params["padding"]
fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)

if top > 0:
output[..., :top, :] = fill
if left > 0:
output[..., :, :left] = fill
if bottom > 0:
output[..., -bottom:, :] = fill
if right > 0:
output[..., :, -right:] = fill

if isinstance(input, features.Image):
output = features.Image.new_like(input, output)

return output
elif isinstance(input, PIL.Image.Image):
return F.pad_image_pil(
input,
params["padding"],
fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]),
padding_mode="constant",
)
elif isinstance(input, features.BoundingBox):
output = F.pad_bounding_box(input, params["padding"], format=input.format)

left, top, right, bottom = params["padding"]
height, width = input.image_size
height += top + bottom
width += left + right

return features.BoundingBox.new_like(input, output, image_size=(height, width))
else:
return input

def forward(self, *inputs: Any) -> Any:
sample = inputs if len(inputs) > 1 else inputs[0]
if torch.rand(1) >= self.p:
return sample

return super().forward(sample)
1 change: 1 addition & 0 deletions torchvision/prototype/transforms/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
rotate_image_pil,
pad_image_tensor,
pad_image_pil,
pad_bounding_box,
crop_image_tensor,
crop_image_pil,
perspective_image_tensor,
Expand Down
22 changes: 21 additions & 1 deletion torchvision/prototype/transforms/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def horizontal_flip_bounding_box(
bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]]

return convert_bounding_box_format(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
).view(shape)


Expand Down Expand Up @@ -210,6 +210,26 @@ def rotate_image_pil(
pad_image_tensor = _FT.pad
pad_image_pil = _FP.pad


def pad_bounding_box(
bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat
) -> torch.Tensor:
left, _, top, _ = _FT._parse_pad_padding(padding)

shape = bounding_box.shape

bounding_box = convert_bounding_box_format(
bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
).view(-1, 4)

bounding_box[:, 0::2] += left
bounding_box[:, 1::2] += top

return convert_bounding_box_format(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
).view(shape)


crop_image_tensor = _FT.crop
crop_image_pil = _FP.crop

Expand Down
21 changes: 16 additions & 5 deletions torchvision/prototype/transforms/functional/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,13 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor:


def convert_bounding_box_format(
bounding_box: torch.Tensor, *, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat
bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = True
) -> torch.Tensor:
if new_format == old_format:
return bounding_box.clone()
if copy:
return bounding_box.clone()
else:
return bounding_box

if old_format == BoundingBoxFormat.XYWH:
bounding_box = _xywh_to_xyxy(bounding_box)
Expand Down Expand Up @@ -89,10 +92,13 @@ def _gray_to_rgb(grayscale: torch.Tensor) -> torch.Tensor:


def convert_image_color_space_tensor(
image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace
image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True
) -> torch.Tensor:
if new_color_space == old_color_space:
return image.clone()
if copy:
return image.clone()
else:
return image

if old_color_space == ColorSpace.OTHER or new_color_space == ColorSpace.OTHER:
raise RuntimeError(f"Conversion to or from {ColorSpace.OTHER} is not supported.")
Expand Down Expand Up @@ -135,11 +141,16 @@ def convert_image_color_space_tensor(
}


def convert_image_color_space_pil(image: PIL.Image.Image, color_space: ColorSpace) -> PIL.Image.Image:
def convert_image_color_space_pil(
image: PIL.Image.Image, color_space: ColorSpace, copy: bool = True
) -> PIL.Image.Image:
old_mode = image.mode
try:
new_mode = _COLOR_SPACE_TO_PIL_MODE[color_space]
except KeyError:
raise ValueError(f"Conversion from {ColorSpace.from_pil_mode(old_mode)} to {color_space} is not supported.")

if not copy and image.mode == new_mode:
return image

return image.convert(new_mode)
38 changes: 21 additions & 17 deletions torchvision/transforms/functional_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,26 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")


def _parse_pad_padding(padding: List[int]) -> List[int]:
if isinstance(padding, int):
if torch.jit.is_scripting():
# This maybe unreachable
raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
pad_left = pad_right = pad_top = pad_bottom = padding
elif len(padding) == 1:
pad_left = pad_right = pad_top = pad_bottom = padding[0]
elif len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
else:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]

return [pad_left, pad_right, pad_top, pad_bottom]


def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
_assert_image_tensor(img)

Expand All @@ -369,23 +389,7 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")

if isinstance(padding, int):
if torch.jit.is_scripting():
# This maybe unreachable
raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
pad_left = pad_right = pad_top = pad_bottom = padding
elif len(padding) == 1:
pad_left = pad_right = pad_top = pad_bottom = padding[0]
elif len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
else:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]

p = [pad_left, pad_right, pad_top, pad_bottom]
p = _parse_pad_padding(padding)

if padding_mode == "edge":
# remap padding_mode str
Expand Down