Skip to content

port RandomZoomOut from detection references to prototype transforms #5551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 14, 2022
2 changes: 1 addition & 1 deletion torchvision/prototype/features/_bounding_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
from torchvision.prototype.transforms.functional import convert_bounding_box_format

if isinstance(format, str):
format = BoundingBoxFormat[format]
format = BoundingBoxFormat.from_str(format.upper())

return BoundingBox.new_like(
self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
Expand Down
11 changes: 10 additions & 1 deletion torchvision/prototype/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,16 @@
from ._augment import RandomErasing, RandomMixup, RandomCutmix
from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix
from ._container import Compose, RandomApply, RandomChoice, RandomOrder
from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, FiveCrop, TenCrop, BatchMultiCrop
from ._geometry import (
HorizontalFlip,
Resize,
CenterCrop,
RandomResizedCrop,
FiveCrop,
TenCrop,
BatchMultiCrop,
RandomZoomOut,
)
from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
from ._misc import Identity, Normalize, ToDtype, Lambda
from ._presets import (
Expand Down
85 changes: 85 additions & 0 deletions torchvision/prototype/transforms/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,88 @@ def apply_recursively(obj: Any) -> Any:
return obj

return apply_recursively(inputs if len(inputs) > 1 else inputs[0])


class RandomZoomOut(Transform):
def __init__(
self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
) -> None:
super().__init__()

if fill is None:
fill = 0.0
self.fill = fill

self.side_range = side_range
if side_range[0] < 1.0 or side_range[0] > side_range[1]:
raise ValueError(f"Invalid canvas side range provided {side_range}.")

self.p = p

def _get_params(self, sample: Any) -> Dict[str, Any]:
image = query_image(sample)
orig_c, orig_h, orig_w = get_image_dimensions(image)

r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
canvas_width = int(orig_w * r)
canvas_height = int(orig_h * r)

r = torch.rand(2)
left = int((canvas_width - orig_w) * r[0])
top = int((canvas_height - orig_h) * r[1])
right = canvas_width - (left + orig_w)
bottom = canvas_height - (top + orig_h)
padding = [left, top, right, bottom]

fill = self.fill
if not isinstance(fill, collections.abc.Sequence):
fill = [fill] * orig_c

return dict(padding=padding, fill=fill)

def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
if isinstance(input, features.Image) or is_simple_tensor(input):
# PyTorch's pad supports only integers on fill. So we need to overwrite the colour
output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant")

left, top, right, bottom = params["padding"]
fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1)

if top > 0:
output[..., :top, :] = fill
if left > 0:
output[..., :, :left] = fill
if bottom > 0:
output[..., -bottom:, :] = fill
if right > 0:
output[..., :, -right:] = fill

if isinstance(input, features.Image):
output = features.Image.new_like(input, output)

return output
elif isinstance(input, PIL.Image.Image):
return F.pad_image_pil(
input,
params["padding"],
fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]),
padding_mode="constant",
)
elif isinstance(input, features.BoundingBox):
output = F.pad_bounding_box(input, params["padding"], format=input.format)

left, top, right, bottom = params["padding"]
height, width = input.image_size
height += top + bottom
width += left + right

return features.BoundingBox.new_like(input, output, image_size=(height, width))
else:
return input

def forward(self, *inputs: Any) -> Any:
sample = inputs if len(inputs) > 1 else inputs[0]
if torch.rand(1) >= self.p:
return sample

return super().forward(sample)
1 change: 1 addition & 0 deletions torchvision/prototype/transforms/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
rotate_image_pil,
pad_image_tensor,
pad_image_pil,
pad_bounding_box,
crop_image_tensor,
crop_image_pil,
perspective_image_tensor,
Expand Down
22 changes: 21 additions & 1 deletion torchvision/prototype/transforms/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def horizontal_flip_bounding_box(
bounding_box[:, [0, 2]] = image_size[1] - bounding_box[:, [2, 0]]

return convert_bounding_box_format(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
).view(shape)


Expand Down Expand Up @@ -210,6 +210,26 @@ def rotate_image_pil(
pad_image_tensor = _FT.pad
pad_image_pil = _FP.pad


def pad_bounding_box(
bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat
) -> torch.Tensor:
left, _, top, _ = _FT._parse_pad_padding(padding)

shape = bounding_box.shape

bounding_box = convert_bounding_box_format(
bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
).view(-1, 4)

bounding_box[:, 0::2] += left
bounding_box[:, 1::2] += top

return convert_bounding_box_format(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
).view(shape)


crop_image_tensor = _FT.crop
crop_image_pil = _FP.crop

Expand Down
21 changes: 16 additions & 5 deletions torchvision/prototype/transforms/functional/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,13 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor:


def convert_bounding_box_format(
bounding_box: torch.Tensor, *, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat
bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, copy: bool = True
) -> torch.Tensor:
if new_format == old_format:
return bounding_box.clone()
if copy:
return bounding_box.clone()
else:
return bounding_box

if old_format == BoundingBoxFormat.XYWH:
bounding_box = _xywh_to_xyxy(bounding_box)
Expand Down Expand Up @@ -89,10 +92,13 @@ def _gray_to_rgb(grayscale: torch.Tensor) -> torch.Tensor:


def convert_image_color_space_tensor(
image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace
image: torch.Tensor, old_color_space: ColorSpace, new_color_space: ColorSpace, copy: bool = True
) -> torch.Tensor:
if new_color_space == old_color_space:
return image.clone()
if copy:
return image.clone()
else:
return image

if old_color_space == ColorSpace.OTHER or new_color_space == ColorSpace.OTHER:
raise RuntimeError(f"Conversion to or from {ColorSpace.OTHER} is not supported.")
Expand Down Expand Up @@ -135,11 +141,16 @@ def convert_image_color_space_tensor(
}


def convert_image_color_space_pil(image: PIL.Image.Image, color_space: ColorSpace) -> PIL.Image.Image:
def convert_image_color_space_pil(
image: PIL.Image.Image, color_space: ColorSpace, copy: bool = True
) -> PIL.Image.Image:
old_mode = image.mode
try:
new_mode = _COLOR_SPACE_TO_PIL_MODE[color_space]
except KeyError:
raise ValueError(f"Conversion from {ColorSpace.from_pil_mode(old_mode)} to {color_space} is not supported.")

if not copy and image.mode == new_mode:
return image

return image.convert(new_mode)
38 changes: 21 additions & 17 deletions torchvision/transforms/functional_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,26 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")


def _parse_pad_padding(padding: List[int]) -> List[int]:
if isinstance(padding, int):
if torch.jit.is_scripting():
# This maybe unreachable
raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
pad_left = pad_right = pad_top = pad_bottom = padding
elif len(padding) == 1:
pad_left = pad_right = pad_top = pad_bottom = padding[0]
elif len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
else:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]

return [pad_left, pad_right, pad_top, pad_bottom]


def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
_assert_image_tensor(img)

Expand All @@ -369,23 +389,7 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")

if isinstance(padding, int):
if torch.jit.is_scripting():
# This maybe unreachable
raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
pad_left = pad_right = pad_top = pad_bottom = padding
elif len(padding) == 1:
pad_left = pad_right = pad_top = pad_bottom = padding[0]
elif len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
else:
pad_left = padding[0]
pad_top = padding[1]
pad_right = padding[2]
pad_bottom = padding[3]

p = [pad_left, pad_right, pad_top, pad_bottom]
p = _parse_pad_padding(padding)

if padding_mode == "edge":
# remap padding_mode str
Expand Down