Skip to content

Add translate and RandomTranslate #806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,69 @@ def test_rotate(self):

assert np.all(np.array(result_a) == np.array(result_b))

def test_translate(self):
input_img = np.zeros((40, 40, 3), dtype=np.uint8)
pts = []
cnt = [20, 20]
for pt in [(16, 16), (20, 16), (20, 20)]:
for i in range(-5, 5):
for j in range(-5, 5):
input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
pts.append((pt[0] + i, pt[1] + j))
pts = list(set(pts))

with self.assertRaises(TypeError):
F.translate(input_img, (10, 20))

pil_img = F.to_pil_image(input_img)

def _to_3x3_inv(inv_result_matrix):
result_matrix = np.zeros((3, 3))
result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3))
result_matrix[2, 2] = 1
return np.linalg.inv(result_matrix)

def _test_transformation(a, t, s, sh):
a_rad = math.radians(a)
s_rad = math.radians(sh)
# 1) Check transformation matrix:
c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]])
c_inv_matrix = np.linalg.inv(c_matrix)
t_matrix = np.array([[1.0, 0.0, t[0]],
[0.0, 1.0, t[1]],
[0.0, 0.0, 1.0]])
r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0],
[s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0],
[0.0, 0.0, 1.0]])
true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix)))
result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a,
translate=t, scale=s, shear=sh))
assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
# 2) Perform inverse mapping:
true_result = np.zeros((40, 40, 3), dtype=np.uint8)
inv_true_matrix = np.linalg.inv(true_matrix)
for y in range(true_result.shape[0]):
for x in range(true_result.shape[1]):
res = np.dot(inv_true_matrix, [x, y, 1])
_x = int(res[0] + 0.5)
_y = int(res[1] + 0.5)
if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
true_result[y, x, :] = input_img[_y, _x, :]

result = F.translate(pil_img, translate=t)
assert result.size == pil_img.size
# Compute number of different pixels:
np_result = np.array(result)
n_diff_pixels = np.sum(np_result != true_result) / 3
# Accept 3 wrong pixels
assert n_diff_pixels < 3, \
"a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\
"n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))

# Test translation
t = [10, 20]
_test_transformation(a=0.0, t=t, s=1.0, sh=0.0)

def test_affine(self):
input_img = np.zeros((40, 40, 3), dtype=np.uint8)
pts = []
Expand Down Expand Up @@ -1109,6 +1172,28 @@ def test_random_rotation(self):
# Checking if RandomRotation can be printed as string
t.__repr__()

def test_random_translate(self):

with self.assertRaises(ValueError):
transforms.RandomTranslate(translate=[-1.0, 1.0])

x = np.zeros((100, 100, 3), dtype=np.uint8)
img = F.to_pil_image(x)

t = transforms.RandomTranslate(translate=[0.5, 0.3])
for _ in range(100):
translations = t.get_params(t.translate, img_size=img.size)
assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, \
"{} vs {}".format(translations[0], img.size[0] * 0.5)
assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, \
"{} vs {}".format(translations[1], img.size[1] * 0.5)

# Checking if RandomAffine can be printed as string
t.__repr__()

t = transforms.RandomTranslate((0.2, 0.1), resample=Image.BILINEAR)
assert "Image.BILINEAR" in t.__repr__()

def test_random_affine(self):

with self.assertRaises(ValueError):
Expand Down
35 changes: 35 additions & 0 deletions torchvision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,41 @@ def rotate(img, angle, resample=False, expand=False, center=None):
return img.rotate(angle, resample, expand, center)


def translate(img, translate, resample=0, fillcolor=None):
"""Apply translation on the image

Args:
img (PIL Image): PIL Image to be rotated.
translate (list or tuple of integers): horizontal and vertical translations
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
An optional resampling filter.
See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
"Argument translate should be a list or tuple of length 2"

output_size = img.size
# Helper method to compute inverse matrix for affine transformation

# We need compute INVERSE of affine transformation matrix: M = T
# where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
# Thus, the inverse is M^-1 = T^-1

# Inverted translation: T^-1
matrix = [
1, 0, -translate[0],
0, 1, -translate[1]
]

kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {}
return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)


def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
# Helper method to compute inverse matrix for affine transformation

Expand Down
66 changes: 65 additions & 1 deletion torchvision/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad",
"Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip",
"RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation",
"ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale"]
"ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale", "RandomTranslate"]

_pil_interpolation_to_str = {
Image.NEAREST: 'PIL.Image.NEAREST',
Expand Down Expand Up @@ -915,6 +915,70 @@ def __repr__(self):
return format_string


class RandomTranslate(object):
"""Random translations of the image

Args:
translate (tuple, optional): tuple of maximum absolute fraction for horizontal
and vertical translations. For example translate=(a, b), then horizontal shift
is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
An optional resampling filter. See `filters`_ for more information.
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)

.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters

"""

def __init__(self, translate, resample=False, fillcolor=0):
assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
"translate should be a list or tuple and it must be of length 2."
for t in translate:
if not (0.0 <= t <= 1.0):
raise ValueError("translation values should be between 0 and 1")
self.translate = translate

self.resample = resample
self.fillcolor = fillcolor

@staticmethod
def get_params(translate, img_size):
"""Get parameters for translation

Returns:
sequence: params to be passed to the translation
"""
max_dx = translate[0] * img_size[0]
max_dy = translate[1] * img_size[1]
translations = (np.round(random.uniform(-max_dx, max_dx)),
np.round(random.uniform(-max_dy, max_dy)))

return translations

def __call__(self, img):
"""
img (PIL Image): Image to be translated.

Returns:
PIL Image: Translated image.
"""
ret = self.get_params(self.translate, img.size)
return F.translate(img, ret, resample=self.resample, fillcolor=self.fillcolor)

def __repr__(self):
s = '{name}(translate={translate}'
if self.resample > 0:
s += ', resample={resample}'
if self.fillcolor != 0:
s += ', fillcolor={fillcolor}'
s += ')'
d = dict(self.__dict__)
d['resample'] = _pil_interpolation_to_str[d['resample']]
return s.format(name=self.__class__.__name__, **d)


class RandomAffine(object):
"""Random affine transformation of the image keeping center invariant

Expand Down