From 905431b7d4d74f0e581b1b3475c1460cae06528a Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Mon, 6 Jul 2020 11:30:00 +0200
Subject: [PATCH 1/6] [WIP] Unify random resized crop

---
 test/test_transforms_tensor.py       |  3 +++
 torchvision/transforms/functional.py | 16 +++++++++-------
 torchvision/transforms/transforms.py | 15 +++++++++------
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index 6a8d9930754..dc3114ec494 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -217,6 +217,9 @@ def test_ten_crop(self):
             "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
 
+    def test_resized_crop(self):
+        pass
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index 9c7efe0ef53..b594da343e0 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -451,24 +451,26 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
     return crop(img, crop_top, crop_left, crop_height, crop_width)
 
 
-def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR):
-    """Crop the given PIL Image and resize it to desired size.
+def resized_crop(
+        img: Tensor, top: int, left: int, height: int, width: int, size: List[int], interpolation: int = Image.BILINEAR
+) -> Tensor:
+    """Crop the given image and resize it to desired size.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
 
     Args:
-        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
         width (int): Width of the crop box.
         size (sequence or int): Desired output size. Same semantics as ``resize``.
-        interpolation (int, optional): Desired interpolation. Default is
-            ``PIL.Image.BILINEAR``.
+        interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``.
     Returns:
-        PIL Image: Cropped image.
+        PIL Image or Tensor: Cropped image.
     """
-    assert F_pil._is_pil_image(img), 'img should be PIL Image'
     img = crop(img, top, left, height, width)
     img = resize(img, size, interpolation)
     return img
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 6bc9e7cbc4d..fa782e2e0d8 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -680,8 +680,10 @@ def __repr__(self):
         return self.__class__.__name__ + '(p={})'.format(self.p)
 
 
-class RandomResizedCrop(object):
-    """Crop the given PIL Image to random size and aspect ratio.
+class RandomResizedCrop(torch.nn.Module):
+    """Crop the given image to random size and aspect ratio.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     A crop of random size (default: of 0.08 to 1.0) of the original size and a random
     aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
@@ -689,10 +691,11 @@ class RandomResizedCrop(object):
     This is popularly used to train the Inception networks.
 
     Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
+        size (int or sequence): expected output size of each edge. If provided a tuple or list of length 1,
+            it will be interpreted as (size[0], size[0]).
+        scale (): range of size of the origin size cropped
+        ratio (): range of aspect ratio of the origin aspect ratio cropped.
+        interpolation (int): Desired interpolation. Default: ``PIL.Image.BILINEAR``
     """
 
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):

From 20e74da4411be07e2c255d671f86b0d904e2bafe Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Mon, 6 Jul 2020 12:47:41 +0200
Subject: [PATCH 2/6] Unify input for RandomResizedCrop

---
 test/test_transforms_tensor.py       | 17 ++++++++-
 torchvision/transforms/transforms.py | 56 +++++++++++++++++-----------
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index dc3114ec494..672eb84b629 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -2,6 +2,7 @@
 from torchvision import transforms as T
 from torchvision.transforms import functional as F
 from PIL import Image
+from PIL.Image import NEAREST, BILINEAR, BICUBIC
 
 import numpy as np
 
@@ -218,7 +219,21 @@ def test_ten_crop(self):
         )
 
     def test_resized_crop(self):
-        pass
+        tensor = torch.randint(0, 255, size=(3, 44, 56), dtype=torch.uint8)
+
+        scale = (0.7, 1.2)
+        ratio = (0.75, 1.333)
+
+        for size in [(32, ), [32, ], [32, 32], (32, 32)]:
+            for interpolation in [NEAREST, BILINEAR, BICUBIC]:
+                transform = T.RandomResizedCrop(
+                    size=size, scale=scale, ratio=ratio, interpolation=interpolation
+                )
+                s_transform = torch.jit.script(transform)
+
+                out1 = transform(tensor)
+                out2 = s_transform(tensor)
+                self.assertTrue(out1.equal(out2))
 
 
 if __name__ == '__main__':
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index fa782e2e0d8..a2568345bd6 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -691,32 +691,44 @@ class RandomResizedCrop(torch.nn.Module):
     This is popularly used to train the Inception networks.
 
     Args:
-        size (int or sequence): expected output size of each edge. If provided a tuple or list of length 1,
-            it will be interpreted as (size[0], size[0]).
-        scale (): range of size of the origin size cropped
-        ratio (): range of aspect ratio of the origin aspect ratio cropped.
+        size (int or sequence): expected output size of each edge. If size is an
+            int instead of sequence like (h, w), a square output size ``(size, size)`` is
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
+        scale (tuple of float): range of size of the origin size cropped
+        ratio (tuple of float): range of aspect ratio of the origin aspect ratio cropped.
         interpolation (int): Desired interpolation. Default: ``PIL.Image.BILINEAR``
     """
 
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
-        if isinstance(size, (tuple, list)):
-            self.size = size
+        super().__init__()
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            self.size = (size, size)
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+        if not isinstance(scale, (tuple, list)):
+            raise TypeError("Scale should be a sequence")
+        if not isinstance(ratio, (tuple, list)):
+            raise TypeError("Ratio should be a sequence")
         if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
+            warnings.warn("Scale and ratio should be of kind (min, max)")
 
         self.interpolation = interpolation
         self.scale = scale
         self.ratio = ratio
 
     @staticmethod
-    def get_params(img, scale, ratio):
+    def get_params(
+            img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float]
+    ) -> Tuple[int, int, int, int]:
         """Get parameters for ``crop`` for a random sized crop.
 
         Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
+            img (PIL Image or Tensor): Input image.
+            scale (tuple): range of scale of the origin size cropped
             ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
 
         Returns:
@@ -727,24 +739,26 @@ def get_params(img, scale, ratio):
         area = height * width
 
         for _ in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
+            target_area = area * torch.empty(1).uniform_(*scale).item()
+            log_ratio = torch.log(torch.tensor(ratio))
+            aspect_ratio = torch.exp(
+                torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+            ).item()
 
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
             if 0 < w <= width and 0 < h <= height:
-                i = random.randint(0, height - h)
-                j = random.randint(0, width - w)
+                i = torch.randint(0, height - h, size=(1,)).item()
+                j = torch.randint(0, width - w, size=(1,)).item()
                 return i, j, h, w
 
         # Fallback to central crop
         in_ratio = float(width) / float(height)
-        if (in_ratio < min(ratio)):
+        if in_ratio < min(ratio):
             w = width
             h = int(round(w / min(ratio)))
-        elif (in_ratio > max(ratio)):
+        elif in_ratio > max(ratio):
             h = height
             w = int(round(h * max(ratio)))
         else:  # whole image
@@ -754,13 +768,13 @@ def get_params(img, scale, ratio):
         j = (width - w) // 2
         return i, j, h, w
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be cropped and resized.
+            img (PIL Image or Tensor): Image to be cropped and resized.
 
         Returns:
-            PIL Image: Randomly cropped and resized image.
+            PIL Image or Tensor: Randomly cropped and resized image.
         """
         i, j, h, w = self.get_params(img, self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)

From 72b3757ced8ca194173f562d502ae67a3757fdab Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Mon, 6 Jul 2020 16:02:36 +0200
Subject: [PATCH 3/6] Fixed bugs and updated test

---
 test/test_transforms_tensor.py       | 2 ++
 torchvision/transforms/transforms.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index 210c1305872..fbd3331a490 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -258,7 +258,9 @@ def test_resized_crop(self):
                 )
                 s_transform = torch.jit.script(transform)
 
+                torch.manual_seed(12)
                 out1 = transform(tensor)
+                torch.manual_seed(12)
                 out2 = s_transform(tensor)
                 self.assertTrue(out1.equal(out2))
 
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index c877e4ff84b..44ac38b5f43 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -715,6 +715,7 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolat
         else:
             if len(size) != 2:
                 raise ValueError("Please provide only two dimensions (h, w) for size.")
+            self.size = size
 
         if not isinstance(scale, (tuple, list)):
             raise TypeError("Scale should be a sequence")
@@ -755,7 +756,7 @@ def get_params(
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
-            if 0 < w <= width and 0 < h <= height:
+            if 0 < w < width and 0 < h < height:
                 i = torch.randint(0, height - h, size=(1,)).item()
                 j = torch.randint(0, width - w, size=(1,)).item()
                 return i, j, h, w

From 6d603b40f77a20aa37e1b666d1d96e7c9ee1706b Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Tue, 7 Jul 2020 11:19:16 +0200
Subject: [PATCH 4/6] Added resized crop functional test - fixed bug with size
 convention

---
 test/test_functional_tensor.py              | 17 +++++++++++++++++
 torchvision/transforms/functional_tensor.py |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
index cd3ae5a0a82..95f7383a4f7 100644
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -331,6 +331,23 @@ def test_resize(self):
                     pad_tensor_script = script_fn(tensor, size=script_size, interpolation=interpolation)
                     self.assertTrue(resized_tensor.equal(pad_tensor_script), msg="{}, {}".format(size, interpolation))
 
+    def test_resized_crop(self):
+        # test values of F.resized_crop in several cases:
+        # 1) resize to the same size, crop to the same size => should be identity
+        tensor, _ = self._create_data(26, 36)
+        for i in [0, 2, 3]:
+            out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=i)
+            self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5]))
+
+        # 2) resize by half and crop a TL corner
+        tensor, _ = self._create_data(26, 36)
+        out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=0)
+        expected_out_tensor = tensor[:, :20:2, :30:2]
+        self.assertTrue(
+            expected_out_tensor.equal(out_tensor),
+            msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10])
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
index be0b7b3a622..59cf6bc2764 100644
--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -532,7 +532,7 @@ def resize(img: Tensor, size: List[int], interpolation: int = 2) -> Tensor:
     elif len(size) < 2:
         size_w, size_h = size[0], size[0]
     else:
-        size_w, size_h = size[0], size[1]
+        size_w, size_h = size[1], size[0]  # Convention (h, w)
 
     if isinstance(size, int) or len(size) < 2:
         if w < h:

From 08caaedbee810c704d0be8e6dd5108588c91669f Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Tue, 7 Jul 2020 12:58:35 +0200
Subject: [PATCH 5/6] Fixed incoherent sampling

---
 torchvision/transforms/transforms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 44ac38b5f43..b17a7bda6aa 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -756,9 +756,9 @@ def get_params(
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
-            if 0 < w < width and 0 < h < height:
-                i = torch.randint(0, height - h, size=(1,)).item()
-                j = torch.randint(0, width - w, size=(1,)).item()
+            if 0 < w <= width and 0 < h <= height:
+                i = torch.randint(0, max(height - h, 1), size=(1,)).item()
+                j = torch.randint(0, max(width - w, 1), size=(1,)).item()
                 return i, j, h, w
 
         # Fallback to central crop

From d7ed08f4926dff934cc627c8edfc3b068dffb6ce Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Tue, 7 Jul 2020 15:46:34 +0200
Subject: [PATCH 6/6] Fixed torch randint review remark

---
 torchvision/transforms/transforms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index b17a7bda6aa..2df2befcb33 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -757,8 +757,8 @@ def get_params(
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
             if 0 < w <= width and 0 < h <= height:
-                i = torch.randint(0, max(height - h, 1), size=(1,)).item()
-                j = torch.randint(0, max(width - w, 1), size=(1,)).item()
+                i = torch.randint(0, height - h + 1, size=(1,)).item()
+                j = torch.randint(0, width - w + 1, size=(1,)).item()
                 return i, j, h, w
 
         # Fallback to central crop