From 905431b7d4d74f0e581b1b3475c1460cae06528a Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 6 Jul 2020 11:30:00 +0200 Subject: [PATCH 1/6] [WIP] Unify random resized crop --- test/test_transforms_tensor.py | 3 +++ torchvision/transforms/functional.py | 16 +++++++++------- torchvision/transforms/transforms.py | 15 +++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 6a8d9930754..dc3114ec494 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -217,6 +217,9 @@ def test_ten_crop(self): "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs ) + def test_resized_crop(self): + pass + if __name__ == '__main__': unittest.main() diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 9c7efe0ef53..b594da343e0 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -451,24 +451,26 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor: return crop(img, crop_top, crop_left, crop_height, crop_width) -def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR): - """Crop the given PIL Image and resize it to desired size. +def resized_crop( + img: Tensor, top: int, left: int, height: int, width: int, size: List[int], interpolation: int = Image.BILINEAR +) -> Tensor: + """Crop the given image and resize it to desired size. + The image can be a PIL Image or a Tensor, in which case it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. Args: - img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. + img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. top (int): Vertical component of the top left corner of the crop box. left (int): Horizontal component of the top left corner of the crop box. height (int): Height of the crop box. width (int): Width of the crop box. size (sequence or int): Desired output size. Same semantics as ``resize``. - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR``. + interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. Returns: - PIL Image: Cropped image. + PIL Image or Tensor: Cropped image. """ - assert F_pil._is_pil_image(img), 'img should be PIL Image' img = crop(img, top, left, height, width) img = resize(img, size, interpolation) return img diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 6bc9e7cbc4d..fa782e2e0d8 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -680,8 +680,10 @@ def __repr__(self): return self.__class__.__name__ + '(p={})'.format(self.p) -class RandomResizedCrop(object): - """Crop the given PIL Image to random size and aspect ratio. +class RandomResizedCrop(torch.nn.Module): + """Crop the given image to random size and aspect ratio. + The image can be a PIL Image or a Tensor, in which case it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions A crop of random size (default: of 0.08 to 1.0) of the original size and a random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop @@ -689,10 +691,11 @@ class RandomResizedCrop(object): This is popularly used to train the Inception networks. Args: - size: expected output size of each edge - scale: range of size of the origin size cropped - ratio: range of aspect ratio of the origin aspect ratio cropped - interpolation: Default: PIL.Image.BILINEAR + size (int or sequence): expected output size of each edge. If provided a tuple or list of length 1, + it will be interpreted as (size[0], size[0]). + scale (): range of size of the origin size cropped + ratio (): range of aspect ratio of the origin aspect ratio cropped. + interpolation (int): Desired interpolation. Default: ``PIL.Image.BILINEAR`` """ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): From 20e74da4411be07e2c255d671f86b0d904e2bafe Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 6 Jul 2020 12:47:41 +0200 Subject: [PATCH 2/6] Unify input for RandomResizedCrop --- test/test_transforms_tensor.py | 17 ++++++++- torchvision/transforms/transforms.py | 56 +++++++++++++++++----------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index dc3114ec494..672eb84b629 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -2,6 +2,7 @@ from torchvision import transforms as T from torchvision.transforms import functional as F from PIL import Image +from PIL.Image import NEAREST, BILINEAR, BICUBIC import numpy as np @@ -218,7 +219,21 @@ def test_ten_crop(self): ) def test_resized_crop(self): - pass + tensor = torch.randint(0, 255, size=(3, 44, 56), dtype=torch.uint8) + + scale = (0.7, 1.2) + ratio = (0.75, 1.333) + + for size in [(32, ), [32, ], [32, 32], (32, 32)]: + for interpolation in [NEAREST, BILINEAR, BICUBIC]: + transform = T.RandomResizedCrop( + size=size, scale=scale, ratio=ratio, interpolation=interpolation + ) + s_transform = torch.jit.script(transform) + + out1 = transform(tensor) + out2 = s_transform(tensor) + self.assertTrue(out1.equal(out2)) if __name__ == '__main__': diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index fa782e2e0d8..a2568345bd6 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -691,32 +691,44 @@ class RandomResizedCrop(torch.nn.Module): This is popularly used to train the Inception networks. Args: - size (int or sequence): expected output size of each edge. If provided a tuple or list of length 1, - it will be interpreted as (size[0], size[0]). - scale (): range of size of the origin size cropped - ratio (): range of aspect ratio of the origin aspect ratio cropped. + size (int or sequence): expected output size of each edge. If size is an + int instead of sequence like (h, w), a square output size ``(size, size)`` is + made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]). + scale (tuple of float): range of size of the origin size cropped + ratio (tuple of float): range of aspect ratio of the origin aspect ratio cropped. interpolation (int): Desired interpolation. Default: ``PIL.Image.BILINEAR`` """ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): - if isinstance(size, (tuple, list)): - self.size = size + super().__init__() + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + elif isinstance(size, Sequence) and len(size) == 1: + self.size = (size[0], size[0]) else: - self.size = (size, size) + if len(size) != 2: + raise ValueError("Please provide only two dimensions (h, w) for size.") + + if not isinstance(scale, (tuple, list)): + raise TypeError("Scale should be a sequence") + if not isinstance(ratio, (tuple, list)): + raise TypeError("Ratio should be a sequence") if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): - warnings.warn("range should be of kind (min, max)") + warnings.warn("Scale and ratio should be of kind (min, max)") self.interpolation = interpolation self.scale = scale self.ratio = ratio @staticmethod - def get_params(img, scale, ratio): + def get_params( + img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float] + ) -> Tuple[int, int, int, int]: """Get parameters for ``crop`` for a random sized crop. Args: - img (PIL Image): Image to be cropped. - scale (tuple): range of size of the origin size cropped + img (PIL Image or Tensor): Input image. + scale (tuple): range of scale of the origin size cropped ratio (tuple): range of aspect ratio of the origin aspect ratio cropped Returns: @@ -727,24 +739,26 @@ def get_params(img, scale, ratio): area = height * width for _ in range(10): - target_area = random.uniform(*scale) * area - log_ratio = (math.log(ratio[0]), math.log(ratio[1])) - aspect_ratio = math.exp(random.uniform(*log_ratio)) + target_area = area * torch.empty(1).uniform_(*scale).item() + log_ratio = torch.log(torch.tensor(ratio)) + aspect_ratio = torch.exp( + torch.empty(1).uniform_(log_ratio[0], log_ratio[1]) + ).item() w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: - i = random.randint(0, height - h) - j = random.randint(0, width - w) + i = torch.randint(0, height - h, size=(1,)).item() + j = torch.randint(0, width - w, size=(1,)).item() return i, j, h, w # Fallback to central crop in_ratio = float(width) / float(height) - if (in_ratio < min(ratio)): + if in_ratio < min(ratio): w = width h = int(round(w / min(ratio))) - elif (in_ratio > max(ratio)): + elif in_ratio > max(ratio): h = height w = int(round(h * max(ratio))) else: # whole image @@ -754,13 +768,13 @@ def get_params(img, scale, ratio): j = (width - w) // 2 return i, j, h, w - def __call__(self, img): + def forward(self, img): """ Args: - img (PIL Image): Image to be cropped and resized. + img (PIL Image or Tensor): Image to be cropped and resized. Returns: - PIL Image: Randomly cropped and resized image. + PIL Image or Tensor: Randomly cropped and resized image. """ i, j, h, w = self.get_params(img, self.scale, self.ratio) return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) From 72b3757ced8ca194173f562d502ae67a3757fdab Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 6 Jul 2020 16:02:36 +0200 Subject: [PATCH 3/6] Fixed bugs and updated test --- test/test_transforms_tensor.py | 2 ++ torchvision/transforms/transforms.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 210c1305872..fbd3331a490 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -258,7 +258,9 @@ def test_resized_crop(self): ) s_transform = torch.jit.script(transform) + torch.manual_seed(12) out1 = transform(tensor) + torch.manual_seed(12) out2 = s_transform(tensor) self.assertTrue(out1.equal(out2)) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index c877e4ff84b..44ac38b5f43 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -715,6 +715,7 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolat else: if len(size) != 2: raise ValueError("Please provide only two dimensions (h, w) for size.") + self.size = size if not isinstance(scale, (tuple, list)): raise TypeError("Scale should be a sequence") @@ -755,7 +756,7 @@ def get_params( w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) - if 0 < w <= width and 0 < h <= height: + if 0 < w < width and 0 < h < height: i = torch.randint(0, height - h, size=(1,)).item() j = torch.randint(0, width - w, size=(1,)).item() return i, j, h, w From 6d603b40f77a20aa37e1b666d1d96e7c9ee1706b Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Tue, 7 Jul 2020 11:19:16 +0200 Subject: [PATCH 4/6] Added resized crop functional test - fixed bug with size convention --- test/test_functional_tensor.py | 17 +++++++++++++++++ torchvision/transforms/functional_tensor.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index cd3ae5a0a82..95f7383a4f7 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -331,6 +331,23 @@ def test_resize(self): pad_tensor_script = script_fn(tensor, size=script_size, interpolation=interpolation) self.assertTrue(resized_tensor.equal(pad_tensor_script), msg="{}, {}".format(size, interpolation)) + def test_resized_crop(self): + # test values of F.resized_crop in several cases: + # 1) resize to the same size, crop to the same size => should be identity + tensor, _ = self._create_data(26, 36) + for i in [0, 2, 3]: + out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=i) + self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])) + + # 2) resize by half and crop a TL corner + tensor, _ = self._create_data(26, 36) + out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=0) + expected_out_tensor = tensor[:, :20:2, :30:2] + self.assertTrue( + expected_out_tensor.equal(out_tensor), + msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10]) + ) + if __name__ == '__main__': unittest.main() diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index be0b7b3a622..59cf6bc2764 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -532,7 +532,7 @@ def resize(img: Tensor, size: List[int], interpolation: int = 2) -> Tensor: elif len(size) < 2: size_w, size_h = size[0], size[0] else: - size_w, size_h = size[0], size[1] + size_w, size_h = size[1], size[0] # Convention (h, w) if isinstance(size, int) or len(size) < 2: if w < h: From 08caaedbee810c704d0be8e6dd5108588c91669f Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Tue, 7 Jul 2020 12:58:35 +0200 Subject: [PATCH 5/6] Fixed incoherent sampling --- torchvision/transforms/transforms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 44ac38b5f43..b17a7bda6aa 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -756,9 +756,9 @@ def get_params( w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) - if 0 < w < width and 0 < h < height: - i = torch.randint(0, height - h, size=(1,)).item() - j = torch.randint(0, width - w, size=(1,)).item() + if 0 < w <= width and 0 < h <= height: + i = torch.randint(0, max(height - h, 1), size=(1,)).item() + j = torch.randint(0, max(width - w, 1), size=(1,)).item() return i, j, h, w # Fallback to central crop From d7ed08f4926dff934cc627c8edfc3b068dffb6ce Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Tue, 7 Jul 2020 15:46:34 +0200 Subject: [PATCH 6/6] Fixed torch randint review remark --- torchvision/transforms/transforms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index b17a7bda6aa..2df2befcb33 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -757,8 +757,8 @@ def get_params( h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: - i = torch.randint(0, max(height - h, 1), size=(1,)).item() - j = torch.randint(0, max(width - w, 1), size=(1,)).item() + i = torch.randint(0, height - h + 1, size=(1,)).item() + j = torch.randint(0, width - w + 1, size=(1,)).item() return i, j, h, w # Fallback to central crop