From 91260788344bb0cf30b4563d5b525ca01486a6a1 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 13 Jul 2020 18:40:31 +0200 Subject: [PATCH] Fixes incoherence in affine transformation when center is defined as half image size + 0.5 Incoherence is when affine transformation is 90 degrees rotation and output contains a zero line --- test/test_transforms.py | 14 +++++++------- torchvision/transforms/functional.py | 8 +++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/test_transforms.py b/test/test_transforms.py index b0eb844fcf8..d583881b472 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1311,14 +1311,11 @@ def test_rotate_fill(self): def test_affine(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) - pts = [] cnt = [20, 20] for pt in [(16, 16), (20, 16), (20, 20)]: for i in range(-5, 5): for j in range(-5, 5): input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] - pts.append((pt[0] + i, pt[1] + j)) - pts = list(set(pts)) with self.assertRaises(TypeError): F.affine(input_img, 10) @@ -1373,9 +1370,12 @@ def _test_transformation(a, t, s, sh): inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): - res = np.dot(inv_true_matrix, [x, y, 1]) - _x = int(res[0] + 0.5) - _y = int(res[1] + 0.5) + # Same as for PIL: + # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/ + # src/libImaging/Geometry.c#L1060 + input_pt = np.array([x + 0.5, y + 0.5, 1.0]) + res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int) + _x, _y = res[:2] if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] @@ -1408,7 +1408,7 @@ def _test_transformation(a, t, s, sh): # Test rotation, scale, translation, shear for a in range(-90, 90, 25): for t1 in range(-10, 10, 5): - for s in [0.75, 0.98, 1.0, 1.1, 1.2]: + for s in [0.75, 0.98, 1.0, 1.2, 1.4]: for sh in range(-15, 15, 5): _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh)) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 801df42a187..35cd222acd9 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -1,12 +1,11 @@ import math import numbers import warnings -from collections.abc import Iterable from typing import Any import numpy as np from numpy import sin, cos, tan -from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION +from PIL import Image, __version__ as PILLOW_VERSION import torch from torch import Tensor @@ -910,7 +909,10 @@ def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None): assert scale > 0.0, "Argument scale should be positive" output_size = img.size - center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) + # center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) + # it is visually better to estimate the center without 0.5 offset + # otherwise image rotated by 90 degrees is shifted 1 pixel + center = (img.size[0] * 0.5, img.size[1] * 0.5) matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) kwargs = {"fillcolor": fillcolor} if int(PILLOW_VERSION.split('.')[0]) >= 5 else {} return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)