diff --git a/test/test_cvtransforms.py b/test/test_cvtransforms.py new file mode 100644 index 00000000000..b7558ee73fa --- /dev/null +++ b/test/test_cvtransforms.py @@ -0,0 +1,64 @@ +import cvtransforms +import transforms +import unittest +import numpy as np +import torch +import cv2 + +class TestOpenCVTransforms(unittest.TestCase): + def testScale(self): + size = 43 + w, h = 68, 54 + img = np.random.randn(h, w, 3) + tr = cvtransforms.Scale(size) + res = tr(img) + self.assertEqual(res.shape[0], size) + self.assertEqual(res.shape[1], h) + + def testCenterCrop(self): + size = 43 + w, h = 68, 54 + img = np.random.randn(h, w, 3) + tr = cvtransforms.CenterCrop(size) + res = tr(img) + self.assertEqual(res.shape[0], size) + self.assertEqual(res.shape[1], size) + + def testNormalize(self): + meanstd = dict(mean=[1,2,3], std=[1,1,1]) + normalize = transforms.Normalize(**meanstd) + cvnormalize = cvtransforms.Normalize(**meanstd) + + w, h = 68, 54 + img = np.random.randn(h, w, 3) + for i in range(3): + img[:,:,i] = i+1 + res_th = normalize(torch.from_numpy(img).clone().permute(2,0,1)).permute(1,2,0).numpy() + res_np = cvnormalize(img) + self.assertEqual(np.abs(res_np - res_th).sum(), 0) + + def testFlip(self): + w, h = 12, 10 + img = np.random.randn(h, w, 1) + img[:,:6,:] = 0 + img[:,6:,:] = 1 + + flip = img + while id(flip) == id(img): + flip = cvtransforms.RandomHorizontalFlip()(img) + self.assertEqual(flip[:,:6,:].mean(), 1) + self.assertEqual(flip[:,6:,:].mean(), 0) + + def testPadding(self): + w, h = 12, 10 + img = np.random.randn(h, w, 1) + img[:,:6,:] = 0 + img[:,6:,:] = 1 + + padded = cvtransforms.Pad(2, cv2.BORDER_REFLECT)(img) + self.assertEqual(padded[:,:8,:].mean(), 0) + self.assertEqual(padded[:,8:,:].mean(), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/torchvision/cvtransforms.py b/torchvision/cvtransforms.py new file mode 100644 index 00000000000..9a2d95078e8 --- /dev/null +++ b/torchvision/cvtransforms.py @@ -0,0 +1,160 @@ +""" OpenCV-based transforms + Operate on np.ndarrays only, no PIL or torch dependency +""" +from __future__ import division +import math +import random +import numpy as np +import numbers +import cv2 + + +class Normalize(object): + """Given mean: (R, G, B) and std: (R, G, B), + will normalize each channel of the np.ndarray, i.e. + channel = (channel - mean) / std + """ + + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, tensor): + return (tensor - self.mean) / self.std + + +class Scale(object): + """Rescales the input np.ndarray to the given 'size'. + 'size' will be the size of the smaller edge. + For example, if height > width, then image will be + rescaled to (size * height / width, size) + size: size of the smaller edge + interpolation: Default: cv.INTER_CUBIC + """ + def __init__(self, size, interpolation=cv2.INTER_CUBIC): + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + w, h = img.shape[1], img.shape[0] + if (w <= h and w == self.size) or (h <= w and h == self.size): + return img + if w < h: + ow = self.size + oh = int(float(self.size) * h / w) + else: + oh = self.size + ow = int(float(self.size) * w / h) + return cv2.resize(img, dsize=(ow, oh), + interpolation=self.interpolation) + + +class CenterCrop(object): + """Crops the given np.ndarray at the center to have a region of + the given size. size can be a tuple (target_height, target_width) + or an integer, in which case the target will be of a square shape + (size, size) + """ + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, img): + w, h = img.shape[1], img.shape[0] + th, tw = self.size + x1 = int(round((w - tw) / 2.)) + y1 = int(round((h - th) / 2.)) + return img[y1:y1+th, x1:x1+tw, :] + + +class Pad(object): + """Pads the given np.ndarray on all sides with the given "pad" value.""" + + def __init__(self, padding, borderType=cv2.BORDER_CONSTANT, borderValue=0): + assert isinstance(padding, numbers.Number) + self.padding = padding + self.borderType = borderType + self.borderValue = borderValue + + def __call__(self, img): + if self.padding == 0: + return img + p = self.padding + res = cv2.copyMakeBorder(img, p, p, p, p, + borderType=self.borderType, + value=self.borderValue) + return res[:, :, np.newaxis] if np.ndim(res) == 2 else res + + +class RandomCrop(object): + """Crops the given np.ndarray at a random location to have a region of + the given size. size can be a tuple (target_height, target_width) + or an integer, in which case the target will be of a square shape + (size, size) + """ + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, img): + w, h = img.shape[1], img.shape[0] + th, tw = self.size + if w == tw and h == th: + return img + + x1 = random.randint(0, w - tw) + y1 = random.randint(0, h - th) + return img[y1:y1+th, x1:x1+tw, :] + + +class RandomHorizontalFlip(object): + """Randomly horizontally flips the given np.ndarray with a probability of 0.5 + """ + def __call__(self, img): + if random.random() < 0.5: + return cv2.flip(img, 1).reshape(img.shape) + return img + + +class RandomSizedCrop(object): + """Random crop the given np.ndarray to a random size of (0.08 to 1.0) of the original size + and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio + This is popularly used to train the Inception networks + size: size of the smaller edge + interpolation: Default: cv2.INTER_CUBIC + """ + def __init__(self, size, interpolation=cv2.INTER_CUBIC): + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + for attempt in range(10): + area = img.shape[0] * img.shape[1] + target_area = random.uniform(0.08, 1.0) * area + aspect_ratio = random.uniform(3. / 4., 4. / 3.) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if random.random() < 0.5: + w, h = h, w + + if w <= img.shape[1] and h <= img.shape[0]: + x1 = random.randint(0, img.shape[1] - w) + y1 = random.randint(0, img.shape[0] - h) + + img = img[y1:y1+h, x1:x1+w, :] + assert img.shape[0] == h and img.shape[1] == w + + return cv2.resize(img, (self.size, self.size), + interpolation=self.interpolation) + + # Fallback + scale = Scale(self.size, interpolation=self.interpolation) + crop = CenterCrop(self.size) + return crop(scale(img)) +