pytorch · szagoruyko · Jan 20, 2017 · Jan 20, 2017
diff --git a/test/test_cvtransforms.py b/test/test_cvtransforms.py
@@ -0,0 +1,64 @@
+import cvtransforms
+import transforms
+import unittest
+import numpy as np
+import torch
+import cv2
+
+class TestOpenCVTransforms(unittest.TestCase):
+    def testScale(self):
+        size = 43
+        w, h = 68, 54
+        img = np.random.randn(h, w, 3)
+        tr = cvtransforms.Scale(size)
+        res = tr(img)
+        self.assertEqual(res.shape[0], size)
+        self.assertEqual(res.shape[1], h)
+
+    def testCenterCrop(self):
+        size = 43
+        w, h = 68, 54
+        img = np.random.randn(h, w, 3)
+        tr = cvtransforms.CenterCrop(size)
+        res = tr(img)
+        self.assertEqual(res.shape[0], size)
+        self.assertEqual(res.shape[1], size)
+
+    def testNormalize(self):
+        meanstd = dict(mean=[1,2,3], std=[1,1,1])
+        normalize = transforms.Normalize(**meanstd)
+        cvnormalize = cvtransforms.Normalize(**meanstd)
+
+        w, h = 68, 54
+        img = np.random.randn(h, w, 3)
+        for i in range(3):
+            img[:,:,i] = i+1
+        res_th = normalize(torch.from_numpy(img).clone().permute(2,0,1)).permute(1,2,0).numpy()
+        res_np = cvnormalize(img)
+        self.assertEqual(np.abs(res_np - res_th).sum(), 0)
+
+    def testFlip(self):
+        w, h = 12, 10
+        img = np.random.randn(h, w, 1)
+        img[:,:6,:] = 0
+        img[:,6:,:] = 1
+
+        flip = img
+        while id(flip) == id(img):
+            flip = cvtransforms.RandomHorizontalFlip()(img)
+        self.assertEqual(flip[:,:6,:].mean(), 1)
+        self.assertEqual(flip[:,6:,:].mean(), 0)
+
+    def testPadding(self):
+        w, h = 12, 10
+        img = np.random.randn(h, w, 1)
+        img[:,:6,:] = 0
+        img[:,6:,:] = 1
+
+        padded = cvtransforms.Pad(2, cv2.BORDER_REFLECT)(img)
+        self.assertEqual(padded[:,:8,:].mean(), 0)
+        self.assertEqual(padded[:,8:,:].mean(), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/cvtransforms.py b/torchvision/cvtransforms.py
@@ -0,0 +1,160 @@
+""" OpenCV-based transforms
+    Operate on np.ndarrays only, no PIL or torch dependency
+"""
+from __future__ import division
+import math
+import random
+import numpy as np
+import numbers
+import cv2
+
+
+class Normalize(object):
+    """Given mean: (R, G, B) and std: (R, G, B),
+    will normalize each channel of the np.ndarray, i.e.
+    channel = (channel - mean) / std
+    """
+
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, tensor):
+        return (tensor - self.mean) / self.std
+
+
+class Scale(object):
+    """Rescales the input np.ndarray to the given 'size'.
+    'size' will be the size of the smaller edge.
+    For example, if height > width, then image will be
+    rescaled to (size * height / width, size)
+    size: size of the smaller edge
+    interpolation: Default: cv.INTER_CUBIC
+    """
+    def __init__(self, size, interpolation=cv2.INTER_CUBIC):
+        self.size = size
+        self.interpolation = interpolation
+
+    def __call__(self, img):
+        w, h = img.shape[1], img.shape[0]
+        if (w <= h and w == self.size) or (h <= w and h == self.size):
+            return img
+        if w < h:
+            ow = self.size
+            oh = int(float(self.size) * h / w)
+        else:
+            oh = self.size
+            ow = int(float(self.size) * w / h)
+        return cv2.resize(img, dsize=(ow, oh),
+                          interpolation=self.interpolation)
+
+
+class CenterCrop(object):
+    """Crops the given np.ndarray at the center to have a region of
+    the given size. size can be a tuple (target_height, target_width)
+    or an integer, in which case the target will be of a square shape
+    (size, size)
+    """
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+
+    def __call__(self, img):
+        w, h = img.shape[1], img.shape[0]
+        th, tw = self.size
+        x1 = int(round((w - tw) / 2.))
+        y1 = int(round((h - th) / 2.))
+        return img[y1:y1+th, x1:x1+tw, :]
+
+
+class Pad(object):
+    """Pads the given np.ndarray on all sides with the given "pad" value."""
+
+    def __init__(self, padding, borderType=cv2.BORDER_CONSTANT, borderValue=0):
+        assert isinstance(padding, numbers.Number)
+        self.padding = padding
+        self.borderType = borderType
+        self.borderValue = borderValue
+
+    def __call__(self, img):
+        if self.padding == 0:
+            return img
+        p = self.padding
+        res = cv2.copyMakeBorder(img, p, p, p, p,
+                                 borderType=self.borderType,
+                                 value=self.borderValue)
+        return res[:, :, np.newaxis] if np.ndim(res) == 2 else res
+
+
+class RandomCrop(object):
+    """Crops the given np.ndarray at a random location to have a region of
+    the given size. size can be a tuple (target_height, target_width)
+    or an integer, in which case the target will be of a square shape
+    (size, size)
+    """
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+
+    def __call__(self, img):
+        w, h = img.shape[1], img.shape[0]
+        th, tw = self.size
+        if w == tw and h == th:
+            return img
+
+        x1 = random.randint(0, w - tw)
+        y1 = random.randint(0, h - th)
+        return img[y1:y1+th, x1:x1+tw, :]
+
+
+class RandomHorizontalFlip(object):
+    """Randomly horizontally flips the given np.ndarray with a probability of 0.5
+    """
+    def __call__(self, img):
+        if random.random() < 0.5:
+            return cv2.flip(img, 1).reshape(img.shape)
+        return img
+
+
+class RandomSizedCrop(object):
+    """Random crop the given np.ndarray to a random size of (0.08 to 1.0) of the original size
+    and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
+    This is popularly used to train the Inception networks
+    size: size of the smaller edge
+    interpolation: Default: cv2.INTER_CUBIC
+    """
+    def __init__(self, size, interpolation=cv2.INTER_CUBIC):
+        self.size = size
+        self.interpolation = interpolation
+
+    def __call__(self, img):
+        for attempt in range(10):
+            area = img.shape[0] * img.shape[1]
+            target_area = random.uniform(0.08, 1.0) * area
+            aspect_ratio = random.uniform(3. / 4., 4. / 3.)
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if random.random() < 0.5:
+                w, h = h, w
+
+            if w <= img.shape[1] and h <= img.shape[0]:
+                x1 = random.randint(0, img.shape[1] - w)
+                y1 = random.randint(0, img.shape[0] - h)
+
+                img = img[y1:y1+h, x1:x1+w, :]
+                assert img.shape[0] == h and img.shape[1] == w
+
+                return cv2.resize(img, (self.size, self.size),
+                                  interpolation=self.interpolation)
+
+        # Fallback
+        scale = Scale(self.size, interpolation=self.interpolation)
+        crop = CenterCrop(self.size)
+        return crop(scale(img))
+