From 1faf9edfc807a9ac5616efc78b3d888a247b0723 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@fb.com>
Date: Thu, 4 Mar 2021 10:23:26 +0000
Subject: [PATCH 1/3] dont do this at home

---
 torchvision/transforms/convert.py        | 218 +++++++++++++++++++++++
 torchvision/transforms/functional.py     |   5 +-
 torchvision/transforms/functional_pil.py |  51 +++---
 3 files changed, 250 insertions(+), 24 deletions(-)
 create mode 100644 torchvision/transforms/convert.py

diff --git a/torchvision/transforms/convert.py b/torchvision/transforms/convert.py
new file mode 100644
index 00000000000..96ac1f1627e
--- /dev/null
+++ b/torchvision/transforms/convert.py
@@ -0,0 +1,218 @@
+import math
+import numbers
+import warnings
+from enum import Enum
+
+import numpy as np
+from PIL import Image
+
+import torch
+from torch import Tensor
+from typing import List, Tuple, Any, Optional
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+
+import numbers
+from typing import Any, List, Sequence
+
+import numpy as np
+import torch
+from PIL import Image, ImageOps, ImageEnhance, ImageFilter, __version__ as PILLOW_VERSION
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+
+
+@torch.jit.unused
+def _is_pil_image(img: Any) -> bool:
+    if accimage is not None:
+        return isinstance(img, (Image.Image, accimage.Image))
+    else:
+        return isinstance(img, Image.Image)
+
+
+def _get_image_size(img: Tensor) -> List[int]:
+    """Returns image size as [w, h]
+    """
+    if isinstance(img, torch.Tensor):
+        return F_t._get_image_size(img)
+
+    return F_pil._get_image_size(img)
+
+
+def _get_image_num_channels(img: Tensor) -> int:
+    """Returns number of image channels
+    """
+    if isinstance(img, torch.Tensor):
+        return F_t._get_image_num_channels(img)
+
+    return F_pil._get_image_num_channels(img)
+
+
+@torch.jit.unused
+def _is_numpy(img: Any) -> bool:
+    return isinstance(img, np.ndarray)
+
+
+@torch.jit.unused
+def _is_numpy_image(img: Any) -> bool:
+    return img.ndim in {2, 3}
+
+
+
+def to_tensor(pic):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    This function does not support torchscript.
+
+    See :class:`~torchvision.transforms.ToTensor` for more details.
+
+    Args:
+        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    if not(_is_pil_image(pic) or _is_numpy(pic)):
+        raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
+
+    if _is_numpy(pic) and not _is_numpy_image(pic):
+        raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
+
+    default_float_dtype = torch.get_default_dtype()
+
+    if isinstance(pic, np.ndarray):
+        # handle numpy array
+        if pic.ndim == 2:
+            pic = pic[:, :, None]
+
+        img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
+        # backward compatibility
+        if isinstance(img, torch.ByteTensor):
+            return img.to(dtype=default_float_dtype).div(255)
+        else:
+            return img
+
+    if accimage is not None and isinstance(pic, accimage.Image):
+        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
+        pic.copyto(nppic)
+        return torch.from_numpy(nppic).to(dtype=default_float_dtype)
+
+    # handle PIL Image
+    if pic.mode == 'I':
+        img = torch.from_numpy(np.array(pic, np.int32, copy=False))
+    elif pic.mode == 'I;16':
+        img = torch.from_numpy(np.array(pic, np.int16, copy=False))
+    elif pic.mode == 'F':
+        img = torch.from_numpy(np.array(pic, np.float32, copy=False))
+    elif pic.mode == '1':
+        img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
+    else:
+        img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
+
+    img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
+    # put it from HWC to CHW format
+    img = img.permute((2, 0, 1)).contiguous()
+    if isinstance(img, torch.ByteTensor):
+        return img.to(dtype=default_float_dtype).div(255)
+    else:
+        return img
+
+
+def to_pil_image(pic, mode=None):
+    """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript.
+
+    See :class:`~torchvision.transforms.ToPILImage` for more details.
+
+    Args:
+        pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+
+    .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+
+    Returns:
+        PIL Image: Image converted to PIL Image.
+    """
+    if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
+        raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
+
+    elif isinstance(pic, torch.Tensor):
+        if pic.ndimension() not in {2, 3}:
+            raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension()))
+
+        elif pic.ndimension() == 2:
+            # if 2D image, add channel dimension (CHW)
+            pic = pic.unsqueeze(0)
+
+        # check number of channels
+        if pic.shape[-3] > 4:
+            raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-3]))
+
+    elif isinstance(pic, np.ndarray):
+        if pic.ndim not in {2, 3}:
+            raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
+
+        elif pic.ndim == 2:
+            # if 2D image, add channel dimension (HWC)
+            pic = np.expand_dims(pic, 2)
+
+        # check number of channels
+        if pic.shape[-1] > 4:
+            raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-1]))
+
+    npimg = pic
+    if isinstance(pic, torch.Tensor):
+        if pic.is_floating_point() and mode != 'F':
+            pic = pic.mul(255).byte()
+        npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))
+
+    if not isinstance(npimg, np.ndarray):
+        raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
+                        'not {}'.format(type(npimg)))
+
+    if npimg.shape[2] == 1:
+        expected_mode = None
+        npimg = npimg[:, :, 0]
+        if npimg.dtype == np.uint8:
+            expected_mode = 'L'
+        elif npimg.dtype == np.int16:
+            expected_mode = 'I;16'
+        elif npimg.dtype == np.int32:
+            expected_mode = 'I'
+        elif npimg.dtype == np.float32:
+            expected_mode = 'F'
+        if mode is not None and mode != expected_mode:
+            raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
+                             .format(mode, np.dtype, expected_mode))
+        mode = expected_mode
+
+    elif npimg.shape[2] == 2:
+        permitted_2_channel_modes = ['LA']
+        if mode is not None and mode not in permitted_2_channel_modes:
+            raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes))
+
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'LA'
+
+    elif npimg.shape[2] == 4:
+        permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
+        if mode is not None and mode not in permitted_4_channel_modes:
+            raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
+
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'RGBA'
+    else:
+        permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
+        if mode is not None and mode not in permitted_3_channel_modes:
+            raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'RGB'
+
+    if mode is None:
+        raise TypeError('Input type {} is not supported'.format(npimg.dtype))
+
+    return Image.fromarray(npimg, mode=mode)
\ No newline at end of file
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index 34ea8de6ad0..f335e8c82bb 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -371,8 +371,9 @@ def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = Inte
         raise TypeError("Argument interpolation should be a InterpolationMode")
 
     if not isinstance(img, torch.Tensor):
-        pil_interpolation = pil_modes_mapping[interpolation]
-        return F_pil.resize(img, size=size, interpolation=pil_interpolation)
+        # pil_interpolation = pil_modes_mapping[interpolation]
+        # return F_pil.resize(img, size=size, interpolation=pil_interpolation)
+        return F_pil.resize(img, size=size, interpolation=interpolation)
 
     return F_t.resize(img, size=size, interpolation=interpolation.value)
 
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
index 6999a2acf5f..2cfb5b3d963 100644
--- a/torchvision/transforms/functional_pil.py
+++ b/torchvision/transforms/functional_pil.py
@@ -203,29 +203,36 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag
     return img.crop((left, top, left + width, top + height))
 
 
+# def resize(img, size, interpolation=Image.BILINEAR):
 @torch.jit.unused
-def resize(img, size, interpolation=Image.BILINEAR):
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
-    if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
-        raise TypeError('Got inappropriate size arg: {}'.format(size))
-
-    if isinstance(size, int) or len(size) == 1:
-        if isinstance(size, Sequence):
-            size = size[0]
-        w, h = img.size
-        if (w <= h and w == size) or (h <= w and h == size):
-            return img
-        if w < h:
-            ow = size
-            oh = int(size * h / w)
-            return img.resize((ow, oh), interpolation)
-        else:
-            oh = size
-            ow = int(size * w / h)
-            return img.resize((ow, oh), interpolation)
-    else:
-        return img.resize(size[::-1], interpolation)
+def resize(img, size, interpolation='bilinear'):
+    from .functional_tensor import resize as ft_resize
+    from . import convert
+
+    t = convert.to_tensor(img)
+    resized_t = ft_resize(t, size, interpolation.value)
+    return convert.to_pil_image(resized_t)
+    # if not _is_pil_image(img):
+    #     raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    # if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
+    #     raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+    # if isinstance(size, int) or len(size) == 1:
+    #     if isinstance(size, Sequence):
+    #         size = size[0]
+    #     w, h = img.size
+    #     if (w <= h and w == size) or (h <= w and h == size):
+    #         return img
+    #     if w < h:
+    #         ow = size
+    #         oh = int(size * h / w)
+    #         return img.resize((ow, oh), interpolation)
+    #     else:
+    #         oh = size
+    #         ow = int(size * w / h)
+    #         return img.resize((ow, oh), interpolation)
+    # else:
+    #     return img.resize(size[::-1], interpolation)
 
 
 @torch.jit.unused

From 7848e89127c6b205d491b41272aeff8a683a00be Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@fb.com>
Date: Thu, 4 Mar 2021 10:47:51 +0000
Subject: [PATCH 2/3] Actually remove file

---
 torchvision/transforms/convert.py        | 218 -----------------------
 torchvision/transforms/functional_pil.py |  28 +--
 2 files changed, 3 insertions(+), 243 deletions(-)
 delete mode 100644 torchvision/transforms/convert.py

diff --git a/torchvision/transforms/convert.py b/torchvision/transforms/convert.py
deleted file mode 100644
index 96ac1f1627e..00000000000
--- a/torchvision/transforms/convert.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import math
-import numbers
-import warnings
-from enum import Enum
-
-import numpy as np
-from PIL import Image
-
-import torch
-from torch import Tensor
-from typing import List, Tuple, Any, Optional
-
-try:
-    import accimage
-except ImportError:
-    accimage = None
-
-import numbers
-from typing import Any, List, Sequence
-
-import numpy as np
-import torch
-from PIL import Image, ImageOps, ImageEnhance, ImageFilter, __version__ as PILLOW_VERSION
-
-try:
-    import accimage
-except ImportError:
-    accimage = None
-
-
-@torch.jit.unused
-def _is_pil_image(img: Any) -> bool:
-    if accimage is not None:
-        return isinstance(img, (Image.Image, accimage.Image))
-    else:
-        return isinstance(img, Image.Image)
-
-
-def _get_image_size(img: Tensor) -> List[int]:
-    """Returns image size as [w, h]
-    """
-    if isinstance(img, torch.Tensor):
-        return F_t._get_image_size(img)
-
-    return F_pil._get_image_size(img)
-
-
-def _get_image_num_channels(img: Tensor) -> int:
-    """Returns number of image channels
-    """
-    if isinstance(img, torch.Tensor):
-        return F_t._get_image_num_channels(img)
-
-    return F_pil._get_image_num_channels(img)
-
-
-@torch.jit.unused
-def _is_numpy(img: Any) -> bool:
-    return isinstance(img, np.ndarray)
-
-
-@torch.jit.unused
-def _is_numpy_image(img: Any) -> bool:
-    return img.ndim in {2, 3}
-
-
-
-def to_tensor(pic):
-    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
-    This function does not support torchscript.
-
-    See :class:`~torchvision.transforms.ToTensor` for more details.
-
-    Args:
-        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
-
-    Returns:
-        Tensor: Converted image.
-    """
-    if not(_is_pil_image(pic) or _is_numpy(pic)):
-        raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
-
-    if _is_numpy(pic) and not _is_numpy_image(pic):
-        raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
-
-    default_float_dtype = torch.get_default_dtype()
-
-    if isinstance(pic, np.ndarray):
-        # handle numpy array
-        if pic.ndim == 2:
-            pic = pic[:, :, None]
-
-        img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
-        # backward compatibility
-        if isinstance(img, torch.ByteTensor):
-            return img.to(dtype=default_float_dtype).div(255)
-        else:
-            return img
-
-    if accimage is not None and isinstance(pic, accimage.Image):
-        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
-        pic.copyto(nppic)
-        return torch.from_numpy(nppic).to(dtype=default_float_dtype)
-
-    # handle PIL Image
-    if pic.mode == 'I':
-        img = torch.from_numpy(np.array(pic, np.int32, copy=False))
-    elif pic.mode == 'I;16':
-        img = torch.from_numpy(np.array(pic, np.int16, copy=False))
-    elif pic.mode == 'F':
-        img = torch.from_numpy(np.array(pic, np.float32, copy=False))
-    elif pic.mode == '1':
-        img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
-    else:
-        img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
-
-    img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
-    # put it from HWC to CHW format
-    img = img.permute((2, 0, 1)).contiguous()
-    if isinstance(img, torch.ByteTensor):
-        return img.to(dtype=default_float_dtype).div(255)
-    else:
-        return img
-
-
-def to_pil_image(pic, mode=None):
-    """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript.
-
-    See :class:`~torchvision.transforms.ToPILImage` for more details.
-
-    Args:
-        pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
-        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
-
-    .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
-
-    Returns:
-        PIL Image: Image converted to PIL Image.
-    """
-    if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
-        raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
-
-    elif isinstance(pic, torch.Tensor):
-        if pic.ndimension() not in {2, 3}:
-            raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension()))
-
-        elif pic.ndimension() == 2:
-            # if 2D image, add channel dimension (CHW)
-            pic = pic.unsqueeze(0)
-
-        # check number of channels
-        if pic.shape[-3] > 4:
-            raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-3]))
-
-    elif isinstance(pic, np.ndarray):
-        if pic.ndim not in {2, 3}:
-            raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
-
-        elif pic.ndim == 2:
-            # if 2D image, add channel dimension (HWC)
-            pic = np.expand_dims(pic, 2)
-
-        # check number of channels
-        if pic.shape[-1] > 4:
-            raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-1]))
-
-    npimg = pic
-    if isinstance(pic, torch.Tensor):
-        if pic.is_floating_point() and mode != 'F':
-            pic = pic.mul(255).byte()
-        npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))
-
-    if not isinstance(npimg, np.ndarray):
-        raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
-                        'not {}'.format(type(npimg)))
-
-    if npimg.shape[2] == 1:
-        expected_mode = None
-        npimg = npimg[:, :, 0]
-        if npimg.dtype == np.uint8:
-            expected_mode = 'L'
-        elif npimg.dtype == np.int16:
-            expected_mode = 'I;16'
-        elif npimg.dtype == np.int32:
-            expected_mode = 'I'
-        elif npimg.dtype == np.float32:
-            expected_mode = 'F'
-        if mode is not None and mode != expected_mode:
-            raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
-                             .format(mode, np.dtype, expected_mode))
-        mode = expected_mode
-
-    elif npimg.shape[2] == 2:
-        permitted_2_channel_modes = ['LA']
-        if mode is not None and mode not in permitted_2_channel_modes:
-            raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes))
-
-        if mode is None and npimg.dtype == np.uint8:
-            mode = 'LA'
-
-    elif npimg.shape[2] == 4:
-        permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
-        if mode is not None and mode not in permitted_4_channel_modes:
-            raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
-
-        if mode is None and npimg.dtype == np.uint8:
-            mode = 'RGBA'
-    else:
-        permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
-        if mode is not None and mode not in permitted_3_channel_modes:
-            raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
-        if mode is None and npimg.dtype == np.uint8:
-            mode = 'RGB'
-
-    if mode is None:
-        raise TypeError('Input type {} is not supported'.format(npimg.dtype))
-
-    return Image.fromarray(npimg, mode=mode)
\ No newline at end of file
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
index 2cfb5b3d963..1659d6f031a 100644
--- a/torchvision/transforms/functional_pil.py
+++ b/torchvision/transforms/functional_pil.py
@@ -203,36 +203,14 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag
     return img.crop((left, top, left + width, top + height))
 
 
-# def resize(img, size, interpolation=Image.BILINEAR):
 @torch.jit.unused
 def resize(img, size, interpolation='bilinear'):
     from .functional_tensor import resize as ft_resize
-    from . import convert
+    from .functional import to_tensor, to_pil_image
 
-    t = convert.to_tensor(img)
+    t = to_tensor(img)
     resized_t = ft_resize(t, size, interpolation.value)
-    return convert.to_pil_image(resized_t)
-    # if not _is_pil_image(img):
-    #     raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
-    # if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
-    #     raise TypeError('Got inappropriate size arg: {}'.format(size))
-
-    # if isinstance(size, int) or len(size) == 1:
-    #     if isinstance(size, Sequence):
-    #         size = size[0]
-    #     w, h = img.size
-    #     if (w <= h and w == size) or (h <= w and h == size):
-    #         return img
-    #     if w < h:
-    #         ow = size
-    #         oh = int(size * h / w)
-    #         return img.resize((ow, oh), interpolation)
-    #     else:
-    #         oh = size
-    #         ow = int(size * w / h)
-    #         return img.resize((ow, oh), interpolation)
-    # else:
-    #     return img.resize(size[::-1], interpolation)
+    return to_pil_image(resized_t)
 
 
 @torch.jit.unused

From f101cfd016b04dc39130f94cf78251bb0c016552 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@fb.com>
Date: Mon, 15 Mar 2021 09:58:59 +0000
Subject: [PATCH 3/3] Use pil_to_tensor instead of to_tensor

---
 torchvision/transforms/functional_pil.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
index 1659d6f031a..7aaf4c2cd8a 100644
--- a/torchvision/transforms/functional_pil.py
+++ b/torchvision/transforms/functional_pil.py
@@ -206,9 +206,9 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag
 @torch.jit.unused
 def resize(img, size, interpolation='bilinear'):
     from .functional_tensor import resize as ft_resize
-    from .functional import to_tensor, to_pil_image
+    from .functional import pil_to_tensor, to_pil_image
 
-    t = to_tensor(img)
+    t = pil_to_tensor(img)
     resized_t = ft_resize(t, size, interpolation.value)
     return to_pil_image(resized_t)