From bd8592a4c956747549dbae03d30782378f1ecdca Mon Sep 17 00:00:00 2001 From: Hayley Song Date: Tue, 12 Nov 2019 01:31:09 -0800 Subject: [PATCH] fixes rotation fill option for Pillow Image of L mode --- torchvision/transforms/functional.py | 136 +++++++++++---------------- torchvision/transforms/transforms.py | 4 +- 2 files changed, 59 insertions(+), 81 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 8ae75f84c5b..eebc0ea7c77 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -2,13 +2,12 @@ import torch import sys import math -from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION +from PIL import Image, ImageOps, ImageEnhance, PILLOW_VERSION try: import accimage except ImportError: accimage = None import numpy as np -from numpy import sin, cos, tan import numbers import collections import warnings @@ -350,53 +349,46 @@ def pad(img, padding, fill=0, padding_mode='constant'): return Image.fromarray(img) -def crop(img, top, left, height, width): +def crop(img, i, j, h, w): """Crop the given PIL Image. + Args: - img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. - top (int): Vertical component of the top left corner of the crop box. - left (int): Horizontal component of the top left corner of the crop box. - height (int): Height of the crop box. - width (int): Width of the crop box. + img (PIL Image): Image to be cropped. + i (int): i in (i,j) i.e coordinates of the upper left corner. + j (int): j in (i,j) i.e coordinates of the upper left corner. + h (int): Height of the cropped image. + w (int): Width of the cropped image. + Returns: PIL Image: Cropped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - return img.crop((left, top, left + width, top + height)) + return img.crop((j, i, j + w, i + h)) def center_crop(img, output_size): - """Crop the given PIL Image and resize it to desired size. - - Args: - img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. - output_size (sequence or int): (height, width) of the crop box. If int, - it is used for both directions - Returns: - PIL Image: Cropped image. - """ if isinstance(output_size, numbers.Number): output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return crop(img, crop_top, crop_left, crop_height, crop_width) + w, h = img.size + th, tw = output_size + i = int(round((h - th) / 2.)) + j = int(round((w - tw) / 2.)) + return crop(img, i, j, th, tw) -def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR): +def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): """Crop the given PIL Image and resize it to desired size. Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. Args: - img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. - top (int): Vertical component of the top left corner of the crop box. - left (int): Horizontal component of the top left corner of the crop box. - height (int): Height of the crop box. - width (int): Width of the crop box. + img (PIL Image): Image to be cropped. + i (int): i in (i,j) i.e coordinates of the upper left corner + j (int): j in (i,j) i.e coordinates of the upper left corner + h (int): Height of the cropped image. + w (int): Width of the cropped image. size (sequence or int): Desired output size. Same semantics as ``resize``. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. @@ -404,7 +396,7 @@ def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINE PIL Image: Cropped image. """ assert _is_pil_image(img), 'img should be PIL Image' - img = crop(img, top, left, height, width) + img = crop(img, i, j, h, w) img = resize(img, size, interpolation) return img @@ -503,18 +495,16 @@ def five_crop(img, size): else: assert len(size) == 2, "Please provide only two dimensions (h, w) for size." - image_width, image_height = img.size - crop_height, crop_width = size - if crop_width > image_width or crop_height > image_height: - msg = "Requested crop size {} is bigger than input size {}" - raise ValueError(msg.format(size, (image_height, image_width))) - - tl = img.crop((0, 0, crop_width, crop_height)) - tr = img.crop((image_width - crop_width, 0, image_width, crop_height)) - bl = img.crop((0, image_height - crop_height, crop_width, image_height)) - br = img.crop((image_width - crop_width, image_height - crop_height, - image_width, image_height)) - center = center_crop(img, (crop_height, crop_width)) + w, h = img.size + crop_h, crop_w = size + if crop_w > w or crop_h > h: + raise ValueError("Requested crop size {} is bigger than input size {}".format(size, + (h, w))) + tl = img.crop((0, 0, crop_w, crop_h)) + tr = img.crop((w - crop_w, 0, w, crop_h)) + bl = img.crop((0, h - crop_h, crop_w, h)) + br = img.crop((w - crop_w, h - crop_h, w, h)) + center = center_crop(img, (crop_h, crop_w)) return (tl, tr, bl, br, center) @@ -714,7 +704,7 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): Origin is the upper left corner. Default is the center of the image. fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. - If int, it is used for all channels respectively. + If int, it is used for all channels respectively. .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters @@ -723,9 +713,9 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - if isinstance(fill, int): + if isinstance(fill, int) and img.mode != 'L': fill = tuple([fill] * 3) - + return img.rotate(angle, resample, expand, center, fillcolor=fill) @@ -737,52 +727,40 @@ def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] # RSS is rotation with scale and shear matrix - # RSS(a, s, (sx, sy)) = - # = R(a) * S(s) * SHy(sy) * SHx(sx) - # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ] - # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ] - # [ 0 , 0 , 1 ] - # - # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: - # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] - # [0, 1 ] [-tan(s), 1] - # + # RSS(a, scale, shear) = [ cos(a + shear_y)*scale -sin(a + shear_x)*scale 0] + # [ sin(a + shear_y)*scale cos(a + shear_x)*scale 0] + # [ 0 0 1] # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 - if isinstance(shear, numbers.Number): + angle = math.radians(angle) + if isinstance(shear, (tuple, list)) and len(shear) == 2: + shear = [math.radians(s) for s in shear] + elif isinstance(shear, numbers.Number): + shear = math.radians(shear) shear = [shear, 0] - - if not isinstance(shear, (tuple, list)) and len(shear) == 2: + else: raise ValueError( "Shear should be a single value or a tuple/list containing " + "two values. Got {}".format(shear)) - - rot = math.radians(angle) - sx, sy = [math.radians(s) for s in shear] - - cx, cy = center - tx, ty = translate - - # RSS without scaling - a = cos(rot - sy) / cos(sy) - b = -cos(rot - sy) * tan(sx) / cos(sy) - sin(rot) - c = sin(rot - sy) / cos(sy) - d = -sin(rot - sy) * tan(sx) / cos(sy) + cos(rot) + scale = 1.0 / scale # Inverted rotation matrix with scale and shear - # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 - M = [d, -b, 0, - -c, a, 0] - M = [x / scale for x in M] + d = math.cos(angle + shear[0]) * math.cos(angle + shear[1]) + \ + math.sin(angle + shear[0]) * math.sin(angle + shear[1]) + matrix = [ + math.cos(angle + shear[0]), math.sin(angle + shear[0]), 0, + -math.sin(angle + shear[1]), math.cos(angle + shear[1]), 0 + ] + matrix = [scale / d * m for m in matrix] # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 - M[2] += M[0] * (-cx - tx) + M[1] * (-cy - ty) - M[5] += M[3] * (-cx - tx) + M[4] * (-cy - ty) + matrix[2] += matrix[0] * (-center[0] - translate[0]) + matrix[1] * (-center[1] - translate[1]) + matrix[5] += matrix[3] * (-center[0] - translate[0]) + matrix[4] * (-center[1] - translate[1]) # Apply center translation: C * RSS^-1 * C^-1 * T^-1 - M[2] += cx - M[5] += cy - return M + matrix[2] += center[0] + matrix[5] += center[1] + return matrix def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None): diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 3ec84aae84c..e8083654518 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -957,7 +957,7 @@ class RandomRotation(object): Origin is the upper left corner. Default is the center of the image. fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. - If int, it is used for all channels respectively. + If int, it is used for all channels respectively. .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters @@ -999,7 +999,7 @@ def __call__(self, img): """ angle = self.get_params(self.degrees) - + return F.rotate(img, angle, self.resample, self.expand, self.center, self.fill) def __repr__(self):