Merge branch 'master' of https://github.com/pytorch/vision into inception

TheCodez · TheCodez · commit 4ecf860568a6 · 2019-02-26T19:24:19.000+01:00
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -208,7 +208,7 @@ def patched_make_field(self, types, domain, items, **kw):
     # `kw` catches `env=None` needed for newer sphinx while maintaining
     #  backwards compatibility when passed along further down!
 
-    # type: (List, unicode, Tuple) -> nodes.field
+    # type: (list, unicode, tuple) -> nodes.field
     def handle_item(fieldarg, content):
         par = nodes.paragraph()
         par += addnodes.literal_strong('', fieldarg)  # Patch: this line added
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -45,6 +45,11 @@ EMNIST
 
 .. autoclass:: EMNIST
 
+FakeData
+~~~~~~~~
+
+.. autoclass:: FakeData
+
 COCO
 ~~~~
 
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1,3 +1,4 @@
+from __future__ import division
 import torch
 import torchvision.transforms as transforms
 import torchvision.transforms.functional as F
@@ -130,6 +131,25 @@ def test_ten_crop(self):
                 assert len(results) == 10
                 assert expected_output == results
 
+    def test_randomresized_params(self):
+        height = random.randint(24, 32) * 2
+        width = random.randint(24, 32) * 2
+        img = torch.ones(3, height, width)
+        to_pil_image = transforms.ToPILImage()
+        img = to_pil_image(img)
+        size = 100
+        epsilon = 0.05
+        for i in range(10):
+            scale_min = round(random.random(), 2)
+            scale_range = (scale_min, scale_min + round(random.random(), 2))
+            aspect_min = round(random.random(), 2)
+            aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
+            randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range)
+            _, _, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
+            aspect_ratio_obtained = w / h
+            assert (min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon or
+                    aspect_ratio_obtained == 1.0)
+
     def test_resize(self):
         height = random.randint(24, 32) * 2
         width = random.randint(24, 32) * 2
@@ -990,10 +1010,10 @@ def test_rotate(self):
         assert np.all(np.array(result_a) == np.array(result_b))
 
     def test_affine(self):
-        input_img = np.zeros((200, 200, 3), dtype=np.uint8)
+        input_img = np.zeros((40, 40, 3), dtype=np.uint8)
         pts = []
-        cnt = [100, 100]
-        for pt in [(80, 80), (100, 80), (100, 100)]:
+        cnt = [20, 20]
+        for pt in [(16, 16), (20, 16), (20, 20)]:
             for i in range(-5, 5):
                 for j in range(-5, 5):
                     input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
@@ -1028,7 +1048,7 @@ def _test_transformation(a, t, s, sh):
                                                                      translate=t, scale=s, shear=sh))
             assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
             # 2) Perform inverse mapping:
-            true_result = np.zeros((200, 200, 3), dtype=np.uint8)
+            true_result = np.zeros((40, 40, 3), dtype=np.uint8)
             inv_true_matrix = np.linalg.inv(true_matrix)
             for y in range(true_result.shape[0]):
                 for x in range(true_result.shape[1]):
diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py
@@ -1,4 +1,5 @@
 from __future__ import print_function
+import warnings
 import torch.utils.data as data
 from PIL import Image
 import os
@@ -37,6 +38,26 @@ class MNIST(data.Dataset):
     classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
                '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
 
+    @property
+    def train_labels(self):
+        warnings.warn("train_labels has been renamed targets")
+        return self.targets
+
+    @property
+    def test_labels(self):
+        warnings.warn("test_labels has been renamed targets")
+        return self.targets
+
+    @property
+    def train_data(self):
+        warnings.warn("train_data has been renamed data")
+        return self.data
+
+    @property
+    def test_data(self):
+        warnings.warn("test_data has been renamed data")
+        return self.data
+
     def __init__(self, root, train=True, transform=None, target_transform=None, download=False):
         self.root = os.path.expanduser(root)
         self.transform = transform
@@ -205,7 +226,7 @@ class KMNIST(MNIST):
 
 
 class EMNIST(MNIST):
-    """`EMNIST <https://www.nist.gov/itl/iad/image-group/emnist-dataset/>`_ Dataset.
+    """`EMNIST <https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist>`_ Dataset.
 
     Args:
         root (string): Root directory of dataset where ``processed/training.pt``
@@ -223,7 +244,8 @@ class EMNIST(MNIST):
         target_transform (callable, optional): A function/transform that takes in the
             target and transforms it.
     """
-    url = 'http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip'
+    # Updated URL from https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist
+    url = 'https://cloudstor.aarnet.edu.au/plus/index.php/s/54h3OuGJhFLwAlQ/download'
     splits = ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
 
     def __init__(self, root, split, **kwargs):
diff --git a/torchvision/models/alexnet.py b/torchvision/models/alexnet.py
@@ -29,6 +29,7 @@ def __init__(self, num_classes=1000):
             nn.ReLU(inplace=True),
             nn.MaxPool2d(kernel_size=3, stride=2),
         )
+        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
         self.classifier = nn.Sequential(
             nn.Dropout(),
             nn.Linear(256 * 6 * 6, 4096),
@@ -41,6 +42,7 @@ def __init__(self, num_classes=1000):
 
     def forward(self, x):
         x = self.features(x)
+        x = self.avgpool(x)
         x = x.view(x.size(0), 256 * 6 * 6)
         x = self.classifier(x)
         return x
diff --git a/torchvision/models/inception.py b/torchvision/models/inception.py
@@ -17,6 +17,10 @@ def inception_v3(pretrained=False, **kwargs):
     r"""Inception v3 model architecture from
     `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
 
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
@@ -74,54 +78,55 @@ def forward(self, x):
             x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
             x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
             x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
-        # 299 x 299 x 3
+        # N x 3 x 299 x 299
         x = self.Conv2d_1a_3x3(x)
-        # 149 x 149 x 32
+        # N x 32 x 149 x 149
         x = self.Conv2d_2a_3x3(x)
-        # 147 x 147 x 32
+        # N x 32 x 147 x 147
         x = self.Conv2d_2b_3x3(x)
-        # 147 x 147 x 64
+        # N x 64 x 147 x 147
         x = F.max_pool2d(x, kernel_size=3, stride=2)
-        # 73 x 73 x 64
+        # N x 64 x 73 x 73
         x = self.Conv2d_3b_1x1(x)
-        # 73 x 73 x 80
+        # N x 80 x 73 x 73
         x = self.Conv2d_4a_3x3(x)
-        # 71 x 71 x 192
+        # N x 192 x 71 x 71
         x = F.max_pool2d(x, kernel_size=3, stride=2)
-        # 35 x 35 x 192
+        # N x 192 x 35 x 35
         x = self.Mixed_5b(x)
-        # 35 x 35 x 256
+        # N x 256 x 35 x 35
         x = self.Mixed_5c(x)
-        # 35 x 35 x 288
+        # N x 288 x 35 x 35
         x = self.Mixed_5d(x)
-        # 35 x 35 x 288
+        # N x 288 x 35 x 35
         x = self.Mixed_6a(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = self.Mixed_6b(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = self.Mixed_6c(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = self.Mixed_6d(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = self.Mixed_6e(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         if self.training and self.aux_logits:
             aux = self.AuxLogits(x)
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = self.Mixed_7a(x)
-        # 8 x 8 x 1280
+        # N x 1280 x 8 x 8
         x = self.Mixed_7b(x)
-        # 8 x 8 x 2048
+        # N x 2048 x 8 x 8
         x = self.Mixed_7c(x)
-        # 8 x 8 x 2048
-        x = F.avg_pool2d(x, kernel_size=8)
-        # 1 x 1 x 2048
+        # N x 2048 x 8 x 8
+        # Adaptive average pooling
+        x = F.adaptive_avg_pool2d(x, (1, 1))
+        # N x 2048 x 1 x 1
         x = F.dropout(x, training=self.training)
-        # 1 x 1 x 2048
+        # N x 2048 x 1 x 1
         x = x.view(x.size(0), -1)
-        # 2048
+        # N x 2048
         x = self.fc(x)
-        # 1000 (num_classes)
+        # N x 1000 (num_classes)
         if self.training and self.aux_logits:
             return x, aux
         return x
@@ -300,17 +305,20 @@ def __init__(self, in_channels, num_classes):
         self.fc.stddev = 0.001
 
     def forward(self, x):
-        # 17 x 17 x 768
+        # N x 768 x 17 x 17
         x = F.avg_pool2d(x, kernel_size=5, stride=3)
-        # 5 x 5 x 768
+        # N x 768 x 5 x 5
         x = self.conv0(x)
-        # 5 x 5 x 128
+        # N x 128 x 5 x 5
         x = self.conv1(x)
-        # 1 x 1 x 768
+        # N x 768 x 1 x 1
+        # Adaptive average pooling
+        x = F.adaptive_avg_pool2d(x, (1, 1))
+        # N x 768 x 1 x 1
         x = x.view(x.size(0), -1)
-        # 768
+        # N x 768
         x = self.fc(x)
-        # 1000
+        # N x 1000
         return x
 
 
diff --git a/torchvision/models/vgg.py b/torchvision/models/vgg.py
@@ -25,6 +25,7 @@ class VGG(nn.Module):
     def __init__(self, features, num_classes=1000, init_weights=True):
         super(VGG, self).__init__()
         self.features = features
+        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
         self.classifier = nn.Sequential(
             nn.Linear(512 * 7 * 7, 4096),
             nn.ReLU(True),
@@ -39,6 +40,7 @@ def __init__(self, features, num_classes=1000, init_weights=True):
 
     def forward(self, x):
         x = self.features(x)
+        x = self.avgpool(x)
         x = x.view(x.size(0), -1)
         x = self.classifier(x)
         return x
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -117,7 +117,7 @@ def to_pil_image(pic, mode=None):
 
         elif pic.ndimension() == 2:
             # if 2D image, add channel dimension (CHW)
-            pic.unsqueeze_(0)
+            pic = pic.unsqueeze(0)
 
     elif isinstance(pic, np.ndarray):
         if pic.ndim not in {2, 3}:
@@ -376,8 +376,8 @@ def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
 
     Args:
         img (PIL Image): Image to be cropped.
-        i: Upper pixel coordinate.
-        j: Left pixel coordinate.
+        i: i in (i,j) i.e coordinates of the upper left corner
+        j: j in (i,j) i.e coordinates of the upper left corner
         h: Height of the cropped image.
         w: Width of the cropped image.
         size (sequence or int): Desired output size. Same semantics as ``resize``.
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -543,7 +543,13 @@ class RandomResizedCrop(object):
     """
 
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
-        self.size = (size, size)
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
         self.interpolation = interpolation
         self.scale = scale
         self.ratio = ratio
@@ -570,7 +576,7 @@ def get_params(img, scale, ratio):
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
-            if random.random() < 0.5:
+            if random.random() < 0.5 and min(ratio) <= (h / w) <= max(ratio):
                 w, h = h, w
 
             if w <= img.size[0] and h <= img.size[1]:
diff --git a/torchvision/utils.py b/torchvision/utils.py
@@ -74,7 +74,7 @@ def norm_range(t, range):
     xmaps = min(nrow, nmaps)
     ymaps = int(math.ceil(float(nmaps) / xmaps))
     height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
-    grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(pad_value)
+    grid = tensor.new_full((3, height * ymaps + padding, width * xmaps + padding), pad_value)
     k = 0
     for y in irange(ymaps):
         for x in irange(xmaps):
@@ -99,6 +99,7 @@ def save_image(tensor, filename, nrow=8, padding=2,
     from PIL import Image
     grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value,
                      normalize=normalize, range=range, scale_each=scale_each)
-    ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
+    # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
+    ndarr = grid.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
     im = Image.fromarray(ndarr)
     im.save(filename)