From 512a923361219277f9829b946f2c54336ae1e8ae Mon Sep 17 00:00:00 2001 From: ekka Date: Tue, 19 Mar 2019 23:37:00 +0530 Subject: [PATCH 1/5] Update functional.py --- torchvision/transforms/functional.py | 35 ++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 71c0ff87bf5..6979452d77d 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -662,6 +662,41 @@ def rotate(img, angle, resample=False, expand=False, center=None): return img.rotate(angle, resample, expand, center) +def translate(img, translate, resample=0, fillcolor=None): + """Apply translation on the image + + Args: + img (PIL Image): PIL Image to be rotated. + translate (list or tuple of integers): horizontal and vertical translations + resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): + An optional resampling filter. + See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``. + fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ + "Argument translate should be a list or tuple of length 2" + + output_size = img.size + # Helper method to compute inverse matrix for affine transformation + + # We need compute INVERSE of affine transformation matrix: M = T + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # Thus, the inverse is M^-1 = T^-1 + + # Inverted translation: T^-1 + matrix = [ + 1, 0, -translate[0], + 0, 1, -translate[1] + ] + + kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} + return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs) + + def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # Helper method to compute inverse matrix for affine transformation From 7701df3424efd5c9d6420367520902e9363f94ba Mon Sep 17 00:00:00 2001 From: ekka Date: Tue, 19 Mar 2019 23:37:07 +0530 Subject: [PATCH 2/5] Update transforms.py --- torchvision/transforms/transforms.py | 66 +++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index b972bb2b7c6..bdc2eba8fad 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -27,7 +27,7 @@ __all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad", "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation", - "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale"] + "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale", "RandomTranslate"] _pil_interpolation_to_str = { Image.NEAREST: 'PIL.Image.NEAREST', @@ -915,6 +915,70 @@ def __repr__(self): return format_string +class RandomTranslate(object): + """Random translations of the image + + Args: + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): + An optional resampling filter. See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. + fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__(self, translate, resample=False, fillcolor=0): + assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ + "translate should be a list or tuple and it must be of length 2." + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + self.resample = resample + self.fillcolor = fillcolor + + @staticmethod + def get_params(translate, img_size): + """Get parameters for translation + + Returns: + sequence: params to be passed to the translation + """ + max_dx = translate[0] * img_size[0] + max_dy = translate[1] * img_size[1] + translations = (np.round(random.uniform(-max_dx, max_dx)), + np.round(random.uniform(-max_dy, max_dy))) + + return translations + + def __call__(self, img): + """ + img (PIL Image): Image to be translated. + + Returns: + PIL Image: Translated image. + """ + ret = self.get_params(self.translate, img.size) + return F.translate(img, ret, resample=self.resample, fillcolor=self.fillcolor) + + def __repr__(self): + s = '{name}(translate={translate}' + if self.resample > 0: + s += ', resample={resample}' + if self.fillcolor != 0: + s += ', fillcolor={fillcolor}' + s += ')' + d = dict(self.__dict__) + d['resample'] = _pil_interpolation_to_str[d['resample']] + return s.format(name=self.__class__.__name__, **d) + + class RandomAffine(object): """Random affine transformation of the image keeping center invariant From f7658dde8b896b9f29baf2582c7e711037e0d95f Mon Sep 17 00:00:00 2001 From: ekka Date: Wed, 20 Mar 2019 19:15:08 +0530 Subject: [PATCH 3/5] Add test --- test/test_transforms.py | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/test/test_transforms.py b/test/test_transforms.py index fa3e8472163..e7c63eb39d1 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1009,6 +1009,70 @@ def test_rotate(self): assert np.all(np.array(result_a) == np.array(result_b)) + def test_translate(self): + input_img = np.zeros((40, 40, 3), dtype=np.uint8) + pts = [] + cnt = [20, 20] + for pt in [(16, 16), (20, 16), (20, 20)]: + for i in range(-5, 5): + for j in range(-5, 5): + input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] + pts.append((pt[0] + i, pt[1] + j)) + pts = list(set(pts)) + + with self.assertRaises(TypeError): + F.affine(input_img, 10) + + pil_img = F.to_pil_image(input_img) + + def _to_3x3_inv(inv_result_matrix): + result_matrix = np.zeros((3, 3)) + result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3)) + result_matrix[2, 2] = 1 + return np.linalg.inv(result_matrix) + + def _test_transformation(a, t, s, sh): + a_rad = math.radians(a) + s_rad = math.radians(sh) + # 1) Check transformation matrix: + c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]]) + c_inv_matrix = np.linalg.inv(c_matrix) + t_matrix = np.array([[1.0, 0.0, t[0]], + [0.0, 1.0, t[1]], + [0.0, 0.0, 1.0]]) + r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0], + [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0], + [0.0, 0.0, 1.0]]) + true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix))) + result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a, + translate=t, scale=s, shear=sh)) + assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 + # 2) Perform inverse mapping: + true_result = np.zeros((40, 40, 3), dtype=np.uint8) + inv_true_matrix = np.linalg.inv(true_matrix) + for y in range(true_result.shape[0]): + for x in range(true_result.shape[1]): + res = np.dot(inv_true_matrix, [x, y, 1]) + _x = int(res[0] + 0.5) + _y = int(res[1] + 0.5) + if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: + true_result[y, x, :] = input_img[_y, _x, :] + + result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) + assert result.size == pil_img.size + # Compute number of different pixels: + np_result = np.array(result) + n_diff_pixels = np.sum(np_result != true_result) / 3 + # Accept 3 wrong pixels + assert n_diff_pixels < 3, \ + "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\ + "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])) + + # Test translation + t = [10, 15] + _test_transformation(a=0.0, t=t, s=1.0, sh=0.0) + + def test_affine(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) pts = [] @@ -1109,6 +1173,30 @@ def test_random_rotation(self): # Checking if RandomRotation can be printed as string t.__repr__() + def test_random_translate(self): + + with self.assertRaises(ValueError): + transforms.RandomTranslate(translate=2.0) + transforms.RandomTranslate(translate=[-1.0, 1.0]) + transforms.RandomTranslate(translate=[-1.0, 0.0, 1.0]) + + x = np.zeros((100, 100, 3), dtype=np.uint8) + img = F.to_pil_image(x) + + t = transforms.RandomTranslate(translate=[0.5, 0.3]) + for _ in range(100): + translations = t.get_params(t.translate, img_size=img.size) + assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, \ + "{} vs {}".format(translations[0], img.size[0] * 0.5) + assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, \ + "{} vs {}".format(translations[1], img.size[1] * 0.5) + + # Checking if RandomAffine can be printed as string + t.__repr__() + + t = transforms.RandomTranslate(10, resample=Image.BILINEAR) + assert "Image.BILINEAR" in t.__repr__() + def test_random_affine(self): with self.assertRaises(ValueError): From c87aeaa30630d7582658dfaab268f378c5596796 Mon Sep 17 00:00:00 2001 From: ekka Date: Wed, 20 Mar 2019 19:38:26 +0530 Subject: [PATCH 4/5] Update test_transforms.py --- test/test_transforms.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/test/test_transforms.py b/test/test_transforms.py index e7c63eb39d1..db8702d721b 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1021,7 +1021,7 @@ def test_translate(self): pts = list(set(pts)) with self.assertRaises(TypeError): - F.affine(input_img, 10) + F.translate(input_img, (10, 20)) pil_img = F.to_pil_image(input_img) @@ -1058,7 +1058,7 @@ def _test_transformation(a, t, s, sh): if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] - result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) + result = F.translate(pil_img, translate=t) assert result.size == pil_img.size # Compute number of different pixels: np_result = np.array(result) @@ -1069,10 +1069,9 @@ def _test_transformation(a, t, s, sh): "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])) # Test translation - t = [10, 15] + t = [10, 20] _test_transformation(a=0.0, t=t, s=1.0, sh=0.0) - def test_affine(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) pts = [] @@ -1176,9 +1175,7 @@ def test_random_rotation(self): def test_random_translate(self): with self.assertRaises(ValueError): - transforms.RandomTranslate(translate=2.0) transforms.RandomTranslate(translate=[-1.0, 1.0]) - transforms.RandomTranslate(translate=[-1.0, 0.0, 1.0]) x = np.zeros((100, 100, 3), dtype=np.uint8) img = F.to_pil_image(x) From 7cc6149a4f3f0a7968758604c2c13cf0d8bf7c82 Mon Sep 17 00:00:00 2001 From: ekka Date: Wed, 20 Mar 2019 20:03:34 +0530 Subject: [PATCH 5/5] Update test_transforms.py --- test/test_transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_transforms.py b/test/test_transforms.py index db8702d721b..d1683efdd3c 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1191,7 +1191,7 @@ def test_random_translate(self): # Checking if RandomAffine can be printed as string t.__repr__() - t = transforms.RandomTranslate(10, resample=Image.BILINEAR) + t = transforms.RandomTranslate((0.2, 0.1), resample=Image.BILINEAR) assert "Image.BILINEAR" in t.__repr__() def test_random_affine(self):