Skip to content

Unified inputs for F.rotate #2495

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 49 additions & 8 deletions test/test_functional_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def test_affine(self):
)
# 3) Test translation
test_configs = [
[10, 12], (12, 13)
[10, 12], (-12, -13)
]
for t in test_configs:
for fn in [F.affine, scripted_affine]:
Expand All @@ -447,21 +447,21 @@ def test_affine(self):
test_configs = [
(45, [5, 6], 1.0, [0.0, 0.0]),
(33, (5, -4), 1.0, [0.0, 0.0]),
(45, [5, 4], 1.2, [0.0, 0.0]),
(33, (4, 8), 2.0, [0.0, 0.0]),
(45, [-5, 4], 1.2, [0.0, 0.0]),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a particular reason why we are changing those values?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests were missing negative transpose values, so added just them.

(33, (-4, -8), 2.0, [0.0, 0.0]),
(85, (10, -10), 0.7, [0.0, 0.0]),
(0, [0, 0], 1.0, [35.0, ]),
(25, [0, 0], 1.2, [0.0, 15.0]),
(45, [10, 0], 0.7, [2.0, 5.0]),
(45, [10, -10], 1.2, [4.0, 5.0]),
(45, [-10, 0], 0.7, [2.0, 5.0]),
(45, [-10, -10], 1.2, [4.0, 5.0]),
]
for r in [0, ]:
for a, t, s, sh in test_configs:
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

for fn in [F.affine, scripted_affine]:
out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))

num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% of different pixels
Expand All @@ -473,6 +473,47 @@ def test_affine(self):
)
)

def test_rotate(self):
# Tests on square image
tensor, pil_img = self._create_data(26, 26)
scripted_rotate = torch.jit.script(F.rotate)

img_size = pil_img.size

centers = [
None,
(int(img_size[0] * 0.3), int(img_size[0] * 0.4)),
[int(img_size[0] * 0.5), int(img_size[0] * 0.6)]
]

for r in [0, ]:
for a in range(-120, 120, 23):
for e in [True, False]:
for c in centers:

out_pil_img = F.rotate(pil_img, angle=a, resample=r, expand=e, center=c)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.rotate, scripted_rotate]:
out_tensor = fn(tensor, angle=a, resample=r, expand=e, center=c)

self.assertEqual(
out_tensor.shape,
out_pil_tensor.shape,
msg="{}: {} vs {}".format(
(r, a, e, c), out_tensor.shape, out_pil_tensor.shape
)
)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 2% of different pixels
self.assertLess(
ratio_diff_pixels,
0.02,
msg="{}: {}\n{} vs \n{}".format(
(r, a, e, c), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,7 @@ def test_rotate(self):
x = np.zeros((100, 100, 3), dtype=np.uint8)
x[40, 40] = [255, 255, 255]

with self.assertRaises(TypeError):
with self.assertRaisesRegex(TypeError, r"img should be PIL Image"):
F.rotate(x, 10)

img = F.to_pil_image(x)
Expand Down
88 changes: 53 additions & 35 deletions torchvision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,40 +756,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
return F_t.adjust_gamma(img, gamma, gain)


def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
"""Rotate the image by angle.


Args:
img (PIL Image): PIL Image to be rotated.
angle (float or int): In degrees degrees counter clockwise order.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.

.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters

"""
if not F_pil._is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

opts = _parse_fill(fill, img, '5.2.0')

return img.rotate(angle, resample, expand, center, **opts)


def _get_inverse_affine_matrix(
center: List[int], angle: float, translate: List[float], scale: float, shear: List[float]
center: List[float], angle: float, translate: List[float], scale: float, shear: List[float]
) -> List[float]:
# Helper method to compute inverse matrix for affine transformation

Expand Down Expand Up @@ -838,6 +806,56 @@ def _get_inverse_affine_matrix(
return matrix


def rotate(
img: Tensor, angle: float, resample: int = 0, expand: bool = False,
center: Optional[List[int]] = None, fill: Optional[int] = None
) -> Tensor:
"""Rotate the image by angle.
The image can be a PIL Image or a Tensor, in which case it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.

Args:
img (PIL Image or Tensor): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (list or tuple, optional): Optional center of rotation. Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.

Returns:
PIL Image or Tensor: Rotated image.

.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters

"""
if not isinstance(angle, (int, float)):
raise TypeError("Argument angle should be int or float")

if center is not None and not isinstance(center, (list, tuple)):
raise TypeError("Argument center should be a sequence")

if not isinstance(img, torch.Tensor):
return F_pil.rotate(img, angle=angle, resample=resample, expand=expand, center=center, fill=fill)

center_f = [0.0, 0.0]
if center is not None:
img_size = _get_image_size(img)
# Center is normalized to [-1, +1]
center_f = [2.0 * t / s - 1.0 for s, t in zip(img_size, center)]
# due to current incoherence of rotation angle direction between affine and rotate implementations
# we need to set -angle.
matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
return F_t.rotate(img, matrix=matrix, resample=resample, expand=expand, fill=fill)


def affine(
img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float],
resample: int = 0, fillcolor: Optional[int] = None
Expand All @@ -847,7 +865,7 @@ def affine(
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.

Args:
img (PIL Image or Tensor): image to be rotated.
img (PIL Image or Tensor): image to transform.
angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
scale (float): overall scale
Expand Down Expand Up @@ -911,7 +929,7 @@ def affine(
# we need to rescale translate by image size / 2 as its values can be between -1 and 1
translate = [2.0 * t / s for s, t in zip(img_size, translate)]

matrix = _get_inverse_affine_matrix([0, 0], angle, translate, scale, shear)
matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, translate, scale, shear)
return F_t.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)


Expand Down
34 changes: 34 additions & 0 deletions torchvision/transforms/functional_pil.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,37 @@ def affine(img, matrix, resample=0, fillcolor=None):
output_size = img.size
opts = _parse_fill(fillcolor, img, '5.0.0')
return img.transform(output_size, Image.AFFINE, matrix, resample, **opts)


@torch.jit.unused
def rotate(img, angle, resample=0, expand=False, center=None, fill=None):
"""Rotate PIL image by angle.

Args:
img (PIL Image): image to be rotated.
angle (float or int): rotation angle value in degrees, counter-clockwise.
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (n-tuple or int or float): Pixel fill value for area outside the rotated
image. If int or float, the value is used for all bands respectively.
Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.

Returns:
PIL Image: Rotated image.

.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters

"""
if not _is_pil_image(img):
raise TypeError("img should be PIL Image. Got {}".format(type(img)))

opts = _parse_fill(fill, img, '5.2.0')
return img.rotate(angle, resample, expand, center, **opts)
Loading