From 1faf9edfc807a9ac5616efc78b3d888a247b0723 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 4 Mar 2021 10:23:26 +0000 Subject: [PATCH 1/3] dont do this at home --- torchvision/transforms/convert.py | 218 +++++++++++++++++++++++ torchvision/transforms/functional.py | 5 +- torchvision/transforms/functional_pil.py | 51 +++--- 3 files changed, 250 insertions(+), 24 deletions(-) create mode 100644 torchvision/transforms/convert.py diff --git a/torchvision/transforms/convert.py b/torchvision/transforms/convert.py new file mode 100644 index 00000000000..96ac1f1627e --- /dev/null +++ b/torchvision/transforms/convert.py @@ -0,0 +1,218 @@ +import math +import numbers +import warnings +from enum import Enum + +import numpy as np +from PIL import Image + +import torch +from torch import Tensor +from typing import List, Tuple, Any, Optional + +try: + import accimage +except ImportError: + accimage = None + +import numbers +from typing import Any, List, Sequence + +import numpy as np +import torch +from PIL import Image, ImageOps, ImageEnhance, ImageFilter, __version__ as PILLOW_VERSION + +try: + import accimage +except ImportError: + accimage = None + + +@torch.jit.unused +def _is_pil_image(img: Any) -> bool: + if accimage is not None: + return isinstance(img, (Image.Image, accimage.Image)) + else: + return isinstance(img, Image.Image) + + +def _get_image_size(img: Tensor) -> List[int]: + """Returns image size as [w, h] + """ + if isinstance(img, torch.Tensor): + return F_t._get_image_size(img) + + return F_pil._get_image_size(img) + + +def _get_image_num_channels(img: Tensor) -> int: + """Returns number of image channels + """ + if isinstance(img, torch.Tensor): + return F_t._get_image_num_channels(img) + + return F_pil._get_image_num_channels(img) + + +@torch.jit.unused +def _is_numpy(img: Any) -> bool: + return isinstance(img, np.ndarray) + + +@torch.jit.unused +def _is_numpy_image(img: Any) -> bool: + return img.ndim in {2, 3} + + + +def to_tensor(pic): + """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + This function does not support torchscript. + + See :class:`~torchvision.transforms.ToTensor` for more details. + + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + if not(_is_pil_image(pic) or _is_numpy(pic)): + raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) + + if _is_numpy(pic) and not _is_numpy_image(pic): + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) + + default_float_dtype = torch.get_default_dtype() + + if isinstance(pic, np.ndarray): + # handle numpy array + if pic.ndim == 2: + pic = pic[:, :, None] + + img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() + # backward compatibility + if isinstance(img, torch.ByteTensor): + return img.to(dtype=default_float_dtype).div(255) + else: + return img + + if accimage is not None and isinstance(pic, accimage.Image): + nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) + pic.copyto(nppic) + return torch.from_numpy(nppic).to(dtype=default_float_dtype) + + # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + elif pic.mode == 'F': + img = torch.from_numpy(np.array(pic, np.float32, copy=False)) + elif pic.mode == '1': + img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) + else: + img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) + + img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) + # put it from HWC to CHW format + img = img.permute((2, 0, 1)).contiguous() + if isinstance(img, torch.ByteTensor): + return img.to(dtype=default_float_dtype).div(255) + else: + return img + + +def to_pil_image(pic, mode=None): + """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. + + See :class:`~torchvision.transforms.ToPILImage` for more details. + + Args: + pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. + mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). + + .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes + + Returns: + PIL Image: Image converted to PIL Image. + """ + if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): + raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) + + elif isinstance(pic, torch.Tensor): + if pic.ndimension() not in {2, 3}: + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension())) + + elif pic.ndimension() == 2: + # if 2D image, add channel dimension (CHW) + pic = pic.unsqueeze(0) + + # check number of channels + if pic.shape[-3] > 4: + raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-3])) + + elif isinstance(pic, np.ndarray): + if pic.ndim not in {2, 3}: + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) + + elif pic.ndim == 2: + # if 2D image, add channel dimension (HWC) + pic = np.expand_dims(pic, 2) + + # check number of channels + if pic.shape[-1] > 4: + raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-1])) + + npimg = pic + if isinstance(pic, torch.Tensor): + if pic.is_floating_point() and mode != 'F': + pic = pic.mul(255).byte() + npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0)) + + if not isinstance(npimg, np.ndarray): + raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + + 'not {}'.format(type(npimg))) + + if npimg.shape[2] == 1: + expected_mode = None + npimg = npimg[:, :, 0] + if npimg.dtype == np.uint8: + expected_mode = 'L' + elif npimg.dtype == np.int16: + expected_mode = 'I;16' + elif npimg.dtype == np.int32: + expected_mode = 'I' + elif npimg.dtype == np.float32: + expected_mode = 'F' + if mode is not None and mode != expected_mode: + raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" + .format(mode, np.dtype, expected_mode)) + mode = expected_mode + + elif npimg.shape[2] == 2: + permitted_2_channel_modes = ['LA'] + if mode is not None and mode not in permitted_2_channel_modes: + raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes)) + + if mode is None and npimg.dtype == np.uint8: + mode = 'LA' + + elif npimg.shape[2] == 4: + permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX'] + if mode is not None and mode not in permitted_4_channel_modes: + raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) + + if mode is None and npimg.dtype == np.uint8: + mode = 'RGBA' + else: + permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] + if mode is not None and mode not in permitted_3_channel_modes: + raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) + if mode is None and npimg.dtype == np.uint8: + mode = 'RGB' + + if mode is None: + raise TypeError('Input type {} is not supported'.format(npimg.dtype)) + + return Image.fromarray(npimg, mode=mode) \ No newline at end of file diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 34ea8de6ad0..f335e8c82bb 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -371,8 +371,9 @@ def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = Inte raise TypeError("Argument interpolation should be a InterpolationMode") if not isinstance(img, torch.Tensor): - pil_interpolation = pil_modes_mapping[interpolation] - return F_pil.resize(img, size=size, interpolation=pil_interpolation) + # pil_interpolation = pil_modes_mapping[interpolation] + # return F_pil.resize(img, size=size, interpolation=pil_interpolation) + return F_pil.resize(img, size=size, interpolation=interpolation) return F_t.resize(img, size=size, interpolation=interpolation.value) diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 6999a2acf5f..2cfb5b3d963 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -203,29 +203,36 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag return img.crop((left, top, left + width, top + height)) +# def resize(img, size, interpolation=Image.BILINEAR): @torch.jit.unused -def resize(img, size, interpolation=Image.BILINEAR): - if not _is_pil_image(img): - raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): - raise TypeError('Got inappropriate size arg: {}'.format(size)) - - if isinstance(size, int) or len(size) == 1: - if isinstance(size, Sequence): - size = size[0] - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) +def resize(img, size, interpolation='bilinear'): + from .functional_tensor import resize as ft_resize + from . import convert + + t = convert.to_tensor(img) + resized_t = ft_resize(t, size, interpolation.value) + return convert.to_pil_image(resized_t) + # if not _is_pil_image(img): + # raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + # if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): + # raise TypeError('Got inappropriate size arg: {}'.format(size)) + + # if isinstance(size, int) or len(size) == 1: + # if isinstance(size, Sequence): + # size = size[0] + # w, h = img.size + # if (w <= h and w == size) or (h <= w and h == size): + # return img + # if w < h: + # ow = size + # oh = int(size * h / w) + # return img.resize((ow, oh), interpolation) + # else: + # oh = size + # ow = int(size * w / h) + # return img.resize((ow, oh), interpolation) + # else: + # return img.resize(size[::-1], interpolation) @torch.jit.unused From 7848e89127c6b205d491b41272aeff8a683a00be Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 4 Mar 2021 10:47:51 +0000 Subject: [PATCH 2/3] Actually remove file --- torchvision/transforms/convert.py | 218 ----------------------- torchvision/transforms/functional_pil.py | 28 +-- 2 files changed, 3 insertions(+), 243 deletions(-) delete mode 100644 torchvision/transforms/convert.py diff --git a/torchvision/transforms/convert.py b/torchvision/transforms/convert.py deleted file mode 100644 index 96ac1f1627e..00000000000 --- a/torchvision/transforms/convert.py +++ /dev/null @@ -1,218 +0,0 @@ -import math -import numbers -import warnings -from enum import Enum - -import numpy as np -from PIL import Image - -import torch -from torch import Tensor -from typing import List, Tuple, Any, Optional - -try: - import accimage -except ImportError: - accimage = None - -import numbers -from typing import Any, List, Sequence - -import numpy as np -import torch -from PIL import Image, ImageOps, ImageEnhance, ImageFilter, __version__ as PILLOW_VERSION - -try: - import accimage -except ImportError: - accimage = None - - -@torch.jit.unused -def _is_pil_image(img: Any) -> bool: - if accimage is not None: - return isinstance(img, (Image.Image, accimage.Image)) - else: - return isinstance(img, Image.Image) - - -def _get_image_size(img: Tensor) -> List[int]: - """Returns image size as [w, h] - """ - if isinstance(img, torch.Tensor): - return F_t._get_image_size(img) - - return F_pil._get_image_size(img) - - -def _get_image_num_channels(img: Tensor) -> int: - """Returns number of image channels - """ - if isinstance(img, torch.Tensor): - return F_t._get_image_num_channels(img) - - return F_pil._get_image_num_channels(img) - - -@torch.jit.unused -def _is_numpy(img: Any) -> bool: - return isinstance(img, np.ndarray) - - -@torch.jit.unused -def _is_numpy_image(img: Any) -> bool: - return img.ndim in {2, 3} - - - -def to_tensor(pic): - """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. - This function does not support torchscript. - - See :class:`~torchvision.transforms.ToTensor` for more details. - - Args: - pic (PIL Image or numpy.ndarray): Image to be converted to tensor. - - Returns: - Tensor: Converted image. - """ - if not(_is_pil_image(pic) or _is_numpy(pic)): - raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) - - if _is_numpy(pic) and not _is_numpy_image(pic): - raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) - - default_float_dtype = torch.get_default_dtype() - - if isinstance(pic, np.ndarray): - # handle numpy array - if pic.ndim == 2: - pic = pic[:, :, None] - - img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() - # backward compatibility - if isinstance(img, torch.ByteTensor): - return img.to(dtype=default_float_dtype).div(255) - else: - return img - - if accimage is not None and isinstance(pic, accimage.Image): - nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) - pic.copyto(nppic) - return torch.from_numpy(nppic).to(dtype=default_float_dtype) - - # handle PIL Image - if pic.mode == 'I': - img = torch.from_numpy(np.array(pic, np.int32, copy=False)) - elif pic.mode == 'I;16': - img = torch.from_numpy(np.array(pic, np.int16, copy=False)) - elif pic.mode == 'F': - img = torch.from_numpy(np.array(pic, np.float32, copy=False)) - elif pic.mode == '1': - img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) - else: - img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) - - img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) - # put it from HWC to CHW format - img = img.permute((2, 0, 1)).contiguous() - if isinstance(img, torch.ByteTensor): - return img.to(dtype=default_float_dtype).div(255) - else: - return img - - -def to_pil_image(pic, mode=None): - """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. - - See :class:`~torchvision.transforms.ToPILImage` for more details. - - Args: - pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. - mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). - - .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes - - Returns: - PIL Image: Image converted to PIL Image. - """ - if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): - raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) - - elif isinstance(pic, torch.Tensor): - if pic.ndimension() not in {2, 3}: - raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension())) - - elif pic.ndimension() == 2: - # if 2D image, add channel dimension (CHW) - pic = pic.unsqueeze(0) - - # check number of channels - if pic.shape[-3] > 4: - raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-3])) - - elif isinstance(pic, np.ndarray): - if pic.ndim not in {2, 3}: - raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) - - elif pic.ndim == 2: - # if 2D image, add channel dimension (HWC) - pic = np.expand_dims(pic, 2) - - # check number of channels - if pic.shape[-1] > 4: - raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-1])) - - npimg = pic - if isinstance(pic, torch.Tensor): - if pic.is_floating_point() and mode != 'F': - pic = pic.mul(255).byte() - npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0)) - - if not isinstance(npimg, np.ndarray): - raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + - 'not {}'.format(type(npimg))) - - if npimg.shape[2] == 1: - expected_mode = None - npimg = npimg[:, :, 0] - if npimg.dtype == np.uint8: - expected_mode = 'L' - elif npimg.dtype == np.int16: - expected_mode = 'I;16' - elif npimg.dtype == np.int32: - expected_mode = 'I' - elif npimg.dtype == np.float32: - expected_mode = 'F' - if mode is not None and mode != expected_mode: - raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" - .format(mode, np.dtype, expected_mode)) - mode = expected_mode - - elif npimg.shape[2] == 2: - permitted_2_channel_modes = ['LA'] - if mode is not None and mode not in permitted_2_channel_modes: - raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes)) - - if mode is None and npimg.dtype == np.uint8: - mode = 'LA' - - elif npimg.shape[2] == 4: - permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX'] - if mode is not None and mode not in permitted_4_channel_modes: - raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) - - if mode is None and npimg.dtype == np.uint8: - mode = 'RGBA' - else: - permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] - if mode is not None and mode not in permitted_3_channel_modes: - raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) - if mode is None and npimg.dtype == np.uint8: - mode = 'RGB' - - if mode is None: - raise TypeError('Input type {} is not supported'.format(npimg.dtype)) - - return Image.fromarray(npimg, mode=mode) \ No newline at end of file diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 2cfb5b3d963..1659d6f031a 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -203,36 +203,14 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag return img.crop((left, top, left + width, top + height)) -# def resize(img, size, interpolation=Image.BILINEAR): @torch.jit.unused def resize(img, size, interpolation='bilinear'): from .functional_tensor import resize as ft_resize - from . import convert + from .functional import to_tensor, to_pil_image - t = convert.to_tensor(img) + t = to_tensor(img) resized_t = ft_resize(t, size, interpolation.value) - return convert.to_pil_image(resized_t) - # if not _is_pil_image(img): - # raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - # if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): - # raise TypeError('Got inappropriate size arg: {}'.format(size)) - - # if isinstance(size, int) or len(size) == 1: - # if isinstance(size, Sequence): - # size = size[0] - # w, h = img.size - # if (w <= h and w == size) or (h <= w and h == size): - # return img - # if w < h: - # ow = size - # oh = int(size * h / w) - # return img.resize((ow, oh), interpolation) - # else: - # oh = size - # ow = int(size * w / h) - # return img.resize((ow, oh), interpolation) - # else: - # return img.resize(size[::-1], interpolation) + return to_pil_image(resized_t) @torch.jit.unused From f101cfd016b04dc39130f94cf78251bb0c016552 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 15 Mar 2021 09:58:59 +0000 Subject: [PATCH 3/3] Use pil_to_tensor instead of to_tensor --- torchvision/transforms/functional_pil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 1659d6f031a..7aaf4c2cd8a 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -206,9 +206,9 @@ def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Imag @torch.jit.unused def resize(img, size, interpolation='bilinear'): from .functional_tensor import resize as ft_resize - from .functional import to_tensor, to_pil_image + from .functional import pil_to_tensor, to_pil_image - t = to_tensor(img) + t = pil_to_tensor(img) resized_t = ft_resize(t, size, interpolation.value) return to_pil_image(resized_t)