From fa7cff744b5a302a0f2beb5aabcff23711a34698 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 12 Aug 2020 11:29:44 -0700 Subject: [PATCH 1/6] Fix typo (#1118) In PyTorch tutorial, `torch` should be installed rather than `torchaudio` --- recipes_source/recipes/what_is_state_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes_source/recipes/what_is_state_dict.py b/recipes_source/recipes/what_is_state_dict.py index 8e718e9071e..5e7f259fd7b 100644 --- a/recipes_source/recipes/what_is_state_dict.py +++ b/recipes_source/recipes/what_is_state_dict.py @@ -28,7 +28,7 @@ :: - pip install torchaudio + pip install torch """ From f056cf9de0ed24d2f31a661813b68e11b46ecaca Mon Sep 17 00:00:00 2001 From: Parth Patel Date: Fri, 21 Aug 2020 00:07:41 +0200 Subject: [PATCH 2/6] imagenet_1k and mobilenet_pretrained_float.pth are included in cell --- static_quantization_tutorial.ipynb | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 static_quantization_tutorial.ipynb diff --git a/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb new file mode 100644 index 00000000000..866bd6af38c --- /dev/null +++ b/static_quantization_tutorial.ipynb @@ -0,0 +1,41 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Untitled0.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyOOi/to8wgIrTQMTwi8uZZ3", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2Q81eF0E5nPd", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 76593fe1e8f32a3a7bee01571b0156c50ba4b0dd Mon Sep 17 00:00:00 2001 From: Parth Patel Date: Fri, 21 Aug 2020 00:27:13 +0200 Subject: [PATCH 3/6] moved to right place --- .../static_quantization_tutorial.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename static_quantization_tutorial.ipynb => advanced_source/static_quantization_tutorial.ipynb (100%) diff --git a/static_quantization_tutorial.ipynb b/advanced_source/static_quantization_tutorial.ipynb similarity index 100% rename from static_quantization_tutorial.ipynb rename to advanced_source/static_quantization_tutorial.ipynb From a8107b5f4702d881edf720dcabcd8aca5dc3ae06 Mon Sep 17 00:00:00 2001 From: Parth Patel Date: Fri, 21 Aug 2020 00:28:19 +0200 Subject: [PATCH 4/6] Revert "moved to right place" This reverts commit 76593fe1e8f32a3a7bee01571b0156c50ba4b0dd. --- ...ntization_tutorial.ipynb => static_quantization_tutorial.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename advanced_source/static_quantization_tutorial.ipynb => static_quantization_tutorial.ipynb (100%) diff --git a/advanced_source/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb similarity index 100% rename from advanced_source/static_quantization_tutorial.ipynb rename to static_quantization_tutorial.ipynb From 3125d668a9e3beaeb99004ce289ffdf4019cbd9b Mon Sep 17 00:00:00 2001 From: Parth Patel Date: Fri, 21 Aug 2020 00:28:30 +0200 Subject: [PATCH 5/6] Revert "imagenet_1k and mobilenet_pretrained_float.pth are included in cell" This reverts commit f056cf9de0ed24d2f31a661813b68e11b46ecaca. --- static_quantization_tutorial.ipynb | 41 ------------------------------ 1 file changed, 41 deletions(-) delete mode 100644 static_quantization_tutorial.ipynb diff --git a/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb deleted file mode 100644 index 866bd6af38c..00000000000 --- a/static_quantization_tutorial.ipynb +++ /dev/null @@ -1,41 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Untitled0.ipynb", - "provenance": [], - "authorship_tag": "ABX9TyOOi/to8wgIrTQMTwi8uZZ3", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2Q81eF0E5nPd", - "colab_type": "code", - "colab": {} - }, - "source": [ - "" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file From cfa12324f9bcae8f3aa0542360a9ac54fff6f38f Mon Sep 17 00:00:00 2001 From: Parth Patel Date: Sun, 23 Aug 2020 21:53:57 +0200 Subject: [PATCH 6/6] Create static_quantization_tutorial.ipynb Google Colab ready --- .../static_quantization_tutorial.ipynb | 1269 +++++++++++++++++ 1 file changed, 1269 insertions(+) create mode 100644 advanced_source/static_quantization_tutorial.ipynb diff --git a/advanced_source/static_quantization_tutorial.ipynb b/advanced_source/static_quantization_tutorial.ipynb new file mode 100644 index 00000000000..c5787ad13ec --- /dev/null +++ b/advanced_source/static_quantization_tutorial.ipynb @@ -0,0 +1,1269 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "colab": { + "name": "Copy of static_quantization_tutorial.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "S-IqlZHXmT9F", + "colab_type": "code", + "colab": {} + }, + "source": [ + "%matplotlib inline" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GwgB_PutmT9N", + "colab_type": "text" + }, + "source": [ + "\n", + "(beta) Static Quantization with Eager Mode in PyTorch\n", + "=========================================================\n", + "\n", + "**Author**: `Raghuraman Krishnamoorthi `_\n", + "\n", + "**Edited by**: `Parth Patel `_\n", + "\n", + "This tutorial shows how to do post-training static quantization, as well as illustrating\n", + "two more advanced techniques - per-channel quantization and quantization-aware training -\n", + "to further improve the model's accuracy. Note that quantization is currently only supported\n", + "for CPUs, so we will not be utilizing GPUs / CUDA in this tutorial.\n", + "\n", + "By the end of this tutorial, you will see how quantization in PyTorch can result in\n", + "significant decreases in model size while increasing speed. Furthermore, you'll see how\n", + "to easily apply some advanced quantization techniques shown\n", + "`here `_ so that your quantized models take much less\n", + "of an accuracy hit than they would otherwise.\n", + "\n", + "Warning: we use a lot of boilerplate code from other PyTorch repos to, for example,\n", + "define the ``MobileNetV2`` model archtecture, define data loaders, and so on. We of course\n", + "encourage you to read it; but if you want to get to the quantization features, feel free\n", + "to skip to the \"4. Post-training static quantization\" section.\n", + "\n", + "We'll start by doing the necessary imports:\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TVO3o5uLmT9P", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "8ad1c9e8-5e88-4d3f-9b29-d0c1ccbde953" + }, + "source": [ + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn\n", + "import torchvision\n", + "from torch.utils.data import DataLoader\n", + "from torchvision import datasets\n", + "import torchvision.transforms as transforms\n", + "import os\n", + "import time\n", + "import sys\n", + "import torch.quantization\n", + "\n", + "# # Setup warnings\n", + "import warnings\n", + "warnings.filterwarnings(\n", + " action='ignore',\n", + " category=DeprecationWarning,\n", + " module=r'.*'\n", + ")\n", + "warnings.filterwarnings(\n", + " action='default',\n", + " module=r'torch.quantization'\n", + ")\n", + "\n", + "# Specify random seed for repeatable results\n", + "torch.manual_seed(191009)" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 2 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9yGOQeD6mT9V", + "colab_type": "text" + }, + "source": [ + "1. Model architecture\n", + "---------------------\n", + "\n", + "We first define the MobileNetV2 model architecture, with several notable modifications\n", + "to enable quantization:\n", + "\n", + "- Replacing addition with ``nn.quantized.FloatFunctional``\n", + "- Insert ``QuantStub`` and ``DeQuantStub`` at the beginning and end of the network.\n", + "- Replace ReLU6 with ReLU\n", + "\n", + "Note: this code is taken from\n", + "`here `_.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EZpdZtHwmT9W", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from torch.quantization import QuantStub, DeQuantStub\n", + "\n", + "def _make_divisible(v, divisor, min_value=None):\n", + " \"\"\"\n", + " This function is taken from the original tf repo.\n", + " It ensures that all layers have a channel number that is divisible by 8\n", + " It can be seen here:\n", + " https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n", + " :param v:\n", + " :param divisor:\n", + " :param min_value:\n", + " :return:\n", + " \"\"\"\n", + " if min_value is None:\n", + " min_value = divisor\n", + " new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)\n", + " # Make sure that round down does not go down by more than 10%.\n", + " if new_v < 0.9 * v:\n", + " new_v += divisor\n", + " return new_v\n", + "\n", + "\n", + "class ConvBNReLU(nn.Sequential):\n", + " def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):\n", + " padding = (kernel_size - 1) // 2\n", + " super(ConvBNReLU, self).__init__(\n", + " nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),\n", + " nn.BatchNorm2d(out_planes, momentum=0.1),\n", + " # Replace with ReLU\n", + " nn.ReLU(inplace=False)\n", + " )\n", + "\n", + "\n", + "class InvertedResidual(nn.Module):\n", + " def __init__(self, inp, oup, stride, expand_ratio):\n", + " super(InvertedResidual, self).__init__()\n", + " self.stride = stride\n", + " assert stride in [1, 2]\n", + "\n", + " hidden_dim = int(round(inp * expand_ratio))\n", + " self.use_res_connect = self.stride == 1 and inp == oup\n", + "\n", + " layers = []\n", + " if expand_ratio != 1:\n", + " # pw\n", + " layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))\n", + " layers.extend([\n", + " # dw\n", + " ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),\n", + " # pw-linear\n", + " nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),\n", + " nn.BatchNorm2d(oup, momentum=0.1),\n", + " ])\n", + " self.conv = nn.Sequential(*layers)\n", + " # Replace torch.add with floatfunctional\n", + " self.skip_add = nn.quantized.FloatFunctional()\n", + "\n", + " def forward(self, x):\n", + " if self.use_res_connect:\n", + " return self.skip_add.add(x, self.conv(x))\n", + " else:\n", + " return self.conv(x)\n", + "\n", + "\n", + "class MobileNetV2(nn.Module):\n", + " def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):\n", + " \"\"\"\n", + " MobileNet V2 main class\n", + "\n", + " Args:\n", + " num_classes (int): Number of classes\n", + " width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount\n", + " inverted_residual_setting: Network structure\n", + " round_nearest (int): Round the number of channels in each layer to be a multiple of this number\n", + " Set to 1 to turn off rounding\n", + " \"\"\"\n", + " super(MobileNetV2, self).__init__()\n", + " block = InvertedResidual\n", + " input_channel = 32\n", + " last_channel = 1280\n", + "\n", + " if inverted_residual_setting is None:\n", + " inverted_residual_setting = [\n", + " # t, c, n, s\n", + " [1, 16, 1, 1],\n", + " [6, 24, 2, 2],\n", + " [6, 32, 3, 2],\n", + " [6, 64, 4, 2],\n", + " [6, 96, 3, 1],\n", + " [6, 160, 3, 2],\n", + " [6, 320, 1, 1],\n", + " ]\n", + "\n", + " # only check the first element, assuming user knows t,c,n,s are required\n", + " if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:\n", + " raise ValueError(\"inverted_residual_setting should be non-empty \"\n", + " \"or a 4-element list, got {}\".format(inverted_residual_setting))\n", + "\n", + " # building first layer\n", + " input_channel = _make_divisible(input_channel * width_mult, round_nearest)\n", + " self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)\n", + " features = [ConvBNReLU(3, input_channel, stride=2)]\n", + " # building inverted residual blocks\n", + " for t, c, n, s in inverted_residual_setting:\n", + " output_channel = _make_divisible(c * width_mult, round_nearest)\n", + " for i in range(n):\n", + " stride = s if i == 0 else 1\n", + " features.append(block(input_channel, output_channel, stride, expand_ratio=t))\n", + " input_channel = output_channel\n", + " # building last several layers\n", + " features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))\n", + " # make it nn.Sequential\n", + " self.features = nn.Sequential(*features)\n", + " self.quant = QuantStub()\n", + " self.dequant = DeQuantStub()\n", + " # building classifier\n", + " self.classifier = nn.Sequential(\n", + " nn.Dropout(0.2),\n", + " nn.Linear(self.last_channel, num_classes),\n", + " )\n", + "\n", + " # weight initialization\n", + " for m in self.modules():\n", + " if isinstance(m, nn.Conv2d):\n", + " nn.init.kaiming_normal_(m.weight, mode='fan_out')\n", + " if m.bias is not None:\n", + " nn.init.zeros_(m.bias)\n", + " elif isinstance(m, nn.BatchNorm2d):\n", + " nn.init.ones_(m.weight)\n", + " nn.init.zeros_(m.bias)\n", + " elif isinstance(m, nn.Linear):\n", + " nn.init.normal_(m.weight, 0, 0.01)\n", + " nn.init.zeros_(m.bias)\n", + "\n", + " def forward(self, x):\n", + "\n", + " x = self.quant(x)\n", + "\n", + " x = self.features(x)\n", + " x = x.mean([2, 3])\n", + " x = self.classifier(x)\n", + " x = self.dequant(x)\n", + " return x\n", + "\n", + " # Fuse Conv+BN and Conv+BN+Relu modules prior to quantization\n", + " # This operation does not change the numerics\n", + " def fuse_model(self):\n", + " for m in self.modules():\n", + " if type(m) == ConvBNReLU:\n", + " torch.quantization.fuse_modules(m, ['0', '1', '2'], inplace=True)\n", + " if type(m) == InvertedResidual:\n", + " for idx in range(len(m.conv)):\n", + " if type(m.conv[idx]) == nn.Conv2d:\n", + " torch.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NQpHqXh0mT9b", + "colab_type": "text" + }, + "source": [ + "2. Helper functions\n", + "-------------------\n", + "\n", + "We next define several helper functions to help with model evaluation. These mostly come from\n", + "`here `_.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vL1GtBw9mT9c", + "colab_type": "code", + "colab": {} + }, + "source": [ + "class AverageMeter(object):\n", + " \"\"\"Computes and stores the average and current value\"\"\"\n", + " def __init__(self, name, fmt=':f'):\n", + " self.name = name\n", + " self.fmt = fmt\n", + " self.reset()\n", + "\n", + " def reset(self):\n", + " self.val = 0\n", + " self.avg = 0\n", + " self.sum = 0\n", + " self.count = 0\n", + "\n", + " def update(self, val, n=1):\n", + " self.val = val\n", + " self.sum += val * n\n", + " self.count += n\n", + " self.avg = self.sum / self.count\n", + "\n", + " def __str__(self):\n", + " fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n", + " return fmtstr.format(**self.__dict__)\n", + "\n", + "\n", + "def accuracy(output, target, topk=(1,)):\n", + " \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n", + " with torch.no_grad():\n", + " maxk = max(topk)\n", + " batch_size = target.size(0)\n", + "\n", + " _, pred = output.topk(maxk, 1, True, True)\n", + " pred = pred.t()\n", + " correct = pred.eq(target.view(1, -1).expand_as(pred))\n", + "\n", + " res = []\n", + " for k in topk:\n", + " correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)\n", + " res.append(correct_k.mul_(100.0 / batch_size))\n", + " return res\n", + "\n", + "\n", + "def evaluate(model, criterion, data_loader, neval_batches):\n", + " model.eval()\n", + " top1 = AverageMeter('Acc@1', ':6.2f')\n", + " top5 = AverageMeter('Acc@5', ':6.2f')\n", + " cnt = 0\n", + " with torch.no_grad():\n", + " for image, target in data_loader:\n", + " output = model(image)\n", + " loss = criterion(output, target)\n", + " cnt += 1\n", + " acc1, acc5 = accuracy(output, target, topk=(1, 5))\n", + " print('.', end = '')\n", + " top1.update(acc1[0], image.size(0))\n", + " top5.update(acc5[0], image.size(0))\n", + " if cnt >= neval_batches:\n", + " return top1, top5\n", + "\n", + " return top1, top5\n", + "\n", + "def load_model(model_file):\n", + " model = MobileNetV2()\n", + " state_dict = torch.load(model_file)\n", + " model.load_state_dict(state_dict)\n", + " model.to('cpu')\n", + " return model\n", + "\n", + "def print_size_of_model(model):\n", + " torch.save(model.state_dict(), \"temp.p\")\n", + " print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n", + " os.remove('temp.p')" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nybyd3cap7NC", + "colab_type": "text" + }, + "source": [ + "3. Define dataset and data loaders\n", + "----------------------------------\n", + "\n", + "As our last major setup step, we define our dataloaders for our training and testing set.\n", + "\n", + "\n", + "The specific dataset we've created for this tutorial contains just 1000 images from the ImageNet data, one from\n", + "each class (this dataset, at just over 250 MB, is small enough that it can be downloaded\n", + "relatively easily). The URL for this custom dataset is:\n", + "\n", + " https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip\n", + "\n", + "For the tutorial to run, you can also download this data and move it to the right place using\n", + "`these lines `_\n", + "from the `Makefile ` if not on Google Colab.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AU-068bDmT9i", + "colab_type": "text" + }, + "source": [ + "\n", + "To run the code in this tutorial using the entire ImageNet dataset, on the other hand, you could download\n", + "the data using ``torchvision`` following\n", + "`here `_. Which might not be publicly available. " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p-KGyInjXnT1", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import requests\n", + "import zipfile\n", + "\n", + "url = 'https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip'\n", + "r = requests.get(url, allow_redirects=True)\n", + "\n", + "open('imagenet_1k.zip', 'wb').write(r.content)\n", + "\n", + "with zipfile.ZipFile('/content/imagenet_1k.zip', 'r') as zip_ref:\n", + " zip_ref.extractall('./data')" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "GTo-JBjVmT9j", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def prepare_data_loaders(data_path):\n", + "\n", + " traindir = os.path.join(data_path, 'train')\n", + " valdir = os.path.join(data_path, 'val')\n", + " normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n", + " std=[0.229, 0.224, 0.225])\n", + "\n", + " dataset = torchvision.datasets.ImageFolder(\n", + " traindir,\n", + " transforms.Compose([\n", + " transforms.RandomResizedCrop(224),\n", + " transforms.RandomHorizontalFlip(),\n", + " transforms.ToTensor(),\n", + " normalize,\n", + " ]))\n", + "\n", + " dataset_test = torchvision.datasets.ImageFolder(\n", + " valdir,\n", + " transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " normalize,\n", + " ]))\n", + "\n", + " train_sampler = torch.utils.data.RandomSampler(dataset)\n", + " test_sampler = torch.utils.data.SequentialSampler(dataset_test)\n", + "\n", + " data_loader = torch.utils.data.DataLoader(\n", + " dataset, batch_size=train_batch_size,\n", + " sampler=train_sampler)\n", + "\n", + " data_loader_test = torch.utils.data.DataLoader(\n", + " dataset_test, batch_size=eval_batch_size,\n", + " sampler=test_sampler)\n", + "\n", + " return data_loader, data_loader_test" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nZ1rbOa6mT9p", + "colab_type": "text" + }, + "source": [ + "Next, we'll load in the pre-trained MobileNetV2 model. We provide the URL to download the data from in ``torchvision``\n", + "`here `_.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3OmoorWi0zVJ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "76174591-33d8-4824-8741-9016a00b4d6d" + }, + "source": [ + "url = 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'\n", + "r = requests.get(url, allow_redirects=True)\n", + "open('./data/mobilenet_pretrained_float.pth', 'wb').write(r.content)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "14212972" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2-SFR69KmT9q", + "colab_type": "code", + "colab": {} + }, + "source": [ + "data_path = 'data/imagenet_1k'\n", + "saved_model_dir = 'data/'\n", + "float_model_file = 'mobilenet_pretrained_float.pth'\n", + "scripted_float_model_file = 'mobilenet_quantization_scripted.pth'\n", + "scripted_quantized_model_file = 'mobilenet_quantization_scripted_quantized.pth'\n", + "\n", + "train_batch_size = 30\n", + "eval_batch_size = 30\n", + "\n", + "data_loader, data_loader_test = prepare_data_loaders(data_path)\n", + "criterion = nn.CrossEntropyLoss()\n", + "float_model = load_model(saved_model_dir + float_model_file).to('cpu')" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lR5e2h7WmT90", + "colab_type": "text" + }, + "source": [ + "Next, we'll \"fuse modules\"; this can both make the model faster by saving on memory access\n", + "while also improving numerical accuracy. While this can be used with any model, this is\n", + "especially common with quantized models.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jrzTSHWLmT91", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 496 + }, + "outputId": "31ddd15e-fc30-4d40-e0a7-9887cd5e5029" + }, + "source": [ + "print('\\n Inverted Residual Block: Before fusion \\n\\n', float_model.features[1].conv)\n", + "float_model.eval()\n", + "\n", + "# Fuses modules\n", + "float_model.fuse_model()\n", + "\n", + "# Note fusion of Conv+BN+Relu and Conv+Relu\n", + "print('\\n Inverted Residual Block: After fusion\\n\\n',float_model.features[1].conv)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n", + " Inverted Residual Block: Before fusion \n", + "\n", + " Sequential(\n", + " (0): ConvBNReLU(\n", + " (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n", + " (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " )\n", + " (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + ")\n", + "\n", + " Inverted Residual Block: After fusion\n", + "\n", + " Sequential(\n", + " (0): ConvBNReLU(\n", + " (0): ConvReLU2d(\n", + " (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)\n", + " (1): ReLU()\n", + " )\n", + " (1): Identity()\n", + " (2): Identity()\n", + " )\n", + " (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))\n", + " (2): Identity()\n", + ")\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GL4sKU5nmT-D", + "colab_type": "text" + }, + "source": [ + "Finally to get a \"baseline\" accuracy, let's see the accuracy of our un-quantized model\n", + "with fused modules\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "F9pqyGX9mT-G", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "outputId": "4a09ef24-5357-4413-f147-80e662221558" + }, + "source": [ + "num_eval_batches = 10\n", + "\n", + "print(\"Size of baseline model\")\n", + "print_size_of_model(float_model)\n", + "\n", + "top1, top5 = evaluate(float_model, criterion, data_loader_test, neval_batches=num_eval_batches)\n", + "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))\n", + "torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Size of baseline model\n", + "Size (MB): 13.998515\n", + "..........Evaluation accuracy on 300 images, 78.00\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l9IMIpdPmT-L", + "colab_type": "text" + }, + "source": [ + "We see 78% accuracy on 300 images, a solid baseline for ImageNet,\n", + "especially considering our model is just 14.0 MB.\n", + "\n", + "This will be our baseline to compare to. Next, let's try different quantization methods\n", + "\n", + "4. Post-training static quantization\n", + "------------------------------------\n", + "\n", + "Post-training static quantization involves not just converting the weights from float to int,\n", + "as in dynamic quantization, but also performing the additional step of first feeding batches\n", + "of data through the network and computing the resulting distributions of the different activations\n", + "(specifically, this is done by inserting `observer` modules at different points that record this\n", + "data). These distributions are then used to determine how the specifically the different activations\n", + "should be quantized at inference time (a simple technique would be to simply divide the entire range\n", + "of activations into 256 levels, but we support more sophisticated methods as well). Importantly,\n", + "this additional step allows us to pass quantized values between operations instead of converting these\n", + "values to floats - and then back to ints - between every operation, resulting in a significant speed-up.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vr3kkFyLmT-N", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 818 + }, + "outputId": "fc78613d-21f1-4f2b-d3e4-addd2557d88c" + }, + "source": [ + "num_calibration_batches = 10\n", + "\n", + "myModel = load_model(saved_model_dir + float_model_file).to('cpu')\n", + "myModel.eval()\n", + "\n", + "# Fuse Conv, bn and relu\n", + "myModel.fuse_model()\n", + "\n", + "# Specify quantization configuration\n", + "# Start with simple min/max range estimation and per-tensor quantization of weights\n", + "myModel.qconfig = torch.quantization.default_qconfig\n", + "print(myModel.qconfig)\n", + "torch.quantization.prepare(myModel, inplace=True)\n", + "\n", + "# Calibrate first\n", + "print('Post Training Quantization Prepare: Inserting Observers')\n", + "print('\\n Inverted Residual Block:After observer insertion \\n\\n', myModel.features[1].conv)\n", + "\n", + "# Calibrate with the training set\n", + "evaluate(myModel, criterion, data_loader, neval_batches=num_calibration_batches)\n", + "print('Post Training Quantization: Calibration done')\n", + "\n", + "# Convert to quantized model\n", + "torch.quantization.convert(myModel, inplace=True)\n", + "print('Post Training Quantization: Convert done')\n", + "print('\\n Inverted Residual Block: After fusion and quantization, note fused modules: \\n\\n',myModel.features[1].conv)\n", + "\n", + "print(\"Size of model after quantization\")\n", + "print_size_of_model(myModel)\n", + "\n", + "top1, top5 = evaluate(myModel, criterion, data_loader_test, neval_batches=num_eval_batches)\n", + "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "QConfig(activation=functools.partial(, reduce_range=True), weight=functools.partial(, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric))\n", + "Post Training Quantization Prepare: Inserting Observers\n", + "\n", + " Inverted Residual Block:After observer insertion \n", + "\n", + " Sequential(\n", + " (0): ConvBNReLU(\n", + " (0): ConvReLU2d(\n", + " (0): Conv2d(\n", + " 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32\n", + " (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " (1): ReLU(\n", + " (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " )\n", + " (1): Identity()\n", + " (2): Identity()\n", + " )\n", + " (1): Conv2d(\n", + " 32, 16, kernel_size=(1, 1), stride=(1, 1)\n", + " (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " (2): Identity()\n", + ")\n", + "..........Post Training Quantization: Calibration done\n", + "Post Training Quantization: Convert done\n", + "\n", + " Inverted Residual Block: After fusion and quantization, note fused modules: \n", + "\n", + " Sequential(\n", + " (0): ConvBNReLU(\n", + " (0): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.1306864470243454, zero_point=0, padding=(1, 1), groups=32)\n", + " (1): Identity()\n", + " (2): Identity()\n", + " )\n", + " (1): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=0.16245220601558685, zero_point=65)\n", + " (2): Identity()\n", + ")\n", + "Size of model after quantization\n", + "Size (MB): 3.629327\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:136: UserWarning: must run observer before calling calculate_qparams. Returning default scale and zero point \n", + " Returning default scale and zero point \"\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "..........Evaluation accuracy on 300 images, 67.67\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9UvwzZRamT-a", + "colab_type": "text" + }, + "source": [ + "For this quantized model, we see a significantly lower accuracy of just ~62% on these same 300\n", + "images. Nevertheless, we did reduce the size of our model down to just under 3.6 MB, almost a 4x\n", + "decrease.\n", + "\n", + "In addition, we can significantly improve on the accuracy simply by using a different\n", + "quantization configuration. We repeat the same exercise with the recommended configuration for\n", + "quantizing for x86 architectures. This configuration does the following:\n", + "\n", + "- Quantizes weights on a per-channel basis\n", + "- Uses a histogram observer that collects a histogram of activations and then picks\n", + " quantization parameters in an optimal manner.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0I6WUUgQmT-b", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 107 + }, + "outputId": "89fb543e-2fd8-4a23-abd3-7bf4d00d00d6" + }, + "source": [ + "per_channel_quantized_model = load_model(saved_model_dir + float_model_file)\n", + "per_channel_quantized_model.eval()\n", + "per_channel_quantized_model.fuse_model()\n", + "per_channel_quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')\n", + "print(per_channel_quantized_model.qconfig)\n", + "\n", + "torch.quantization.prepare(per_channel_quantized_model, inplace=True)\n", + "evaluate(per_channel_quantized_model,criterion, data_loader, num_calibration_batches)\n", + "torch.quantization.convert(per_channel_quantized_model, inplace=True)\n", + "top1, top5 = evaluate(per_channel_quantized_model, criterion, data_loader_test, neval_batches=num_eval_batches)\n", + "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))\n", + "torch.jit.save(torch.jit.script(per_channel_quantized_model), saved_model_dir + scripted_quantized_model_file)" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "QConfig(activation=functools.partial(, reduce_range=True), weight=functools.partial(, dtype=torch.qint8, qscheme=torch.per_channel_symmetric))\n", + ".........." + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:877: UserWarning: must run observer before calling calculate_qparams. Returning default scale and zero point \n", + " Returning default scale and zero point \"\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "..........Evaluation accuracy on 300 images, 76.00\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z1ussBHjmT-g", + "colab_type": "text" + }, + "source": [ + "Changing just this quantization configuration method resulted in an increase\n", + "of the accuracy to over 76%! Still, this is 1-2% worse than the baseline of 78% achieved above.\n", + "So lets try quantization aware training.\n", + "\n", + "5. Quantization-aware training\n", + "------------------------------\n", + "\n", + "Quantization-aware training (QAT) is the quantization method that typically results in the highest accuracy.\n", + "With QAT, all weights and activations are “fake quantized” during both the forward and backward passes of\n", + "training: that is, float values are rounded to mimic int8 values, but all computations are still done with\n", + "floating point numbers. Thus, all the weight adjustments during training are made while “aware” of the fact\n", + "that the model will ultimately be quantized; after quantizing, therefore, this method will usually yield\n", + "higher accuracy than either dynamic quantization or post-training static quantization.\n", + "\n", + "The overall workflow for actually performing QAT is very similar to before:\n", + "\n", + "- We can use the same model as before: there is no additional preparation needed for quantization-aware\n", + " training.\n", + "- We need to use a ``qconfig`` specifying what kind of fake-quantization is to be inserted after weights\n", + " and activations, instead of specifying observers\n", + "\n", + "We first define a training function:\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "APtX9dtKmT-h", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def train_one_epoch(model, criterion, optimizer, data_loader, device, ntrain_batches):\n", + " model.train()\n", + " top1 = AverageMeter('Acc@1', ':6.2f')\n", + " top5 = AverageMeter('Acc@5', ':6.2f')\n", + " avgloss = AverageMeter('Loss', '1.5f')\n", + "\n", + " cnt = 0\n", + " for image, target in data_loader:\n", + " start_time = time.time()\n", + " print('.', end = '')\n", + " cnt += 1\n", + " image, target = image.to(device), target.to(device)\n", + " output = model(image)\n", + " loss = criterion(output, target)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " acc1, acc5 = accuracy(output, target, topk=(1, 5))\n", + " top1.update(acc1[0], image.size(0))\n", + " top5.update(acc5[0], image.size(0))\n", + " avgloss.update(loss, image.size(0))\n", + " if cnt >= ntrain_batches:\n", + " print('Loss', avgloss.avg)\n", + "\n", + " print('Training: * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n", + " .format(top1=top1, top5=top5))\n", + " return\n", + "\n", + " print('Full imagenet train set: * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'\n", + " .format(top1=top1, top5=top5))\n", + " return" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eygHNI-VmT-o", + "colab_type": "text" + }, + "source": [ + "We fuse modules as before\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OhT0liNqmT-p", + "colab_type": "code", + "colab": {} + }, + "source": [ + "qat_model = load_model(saved_model_dir + float_model_file)\n", + "qat_model.fuse_model()\n", + "\n", + "optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)\n", + "qat_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')" + ], + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GP7eBfldmT-v", + "colab_type": "text" + }, + "source": [ + "Finally, ``prepare_qat`` performs the \"fake quantization\", preparing the model for quantization-aware\n", + "training\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9_XnVNcCmT-w", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 605 + }, + "outputId": "0e5d8790-b77a-4c43-98a8-b09bb720c88e" + }, + "source": [ + "torch.quantization.prepare_qat(qat_model, inplace=True)\n", + "print('Inverted Residual Block: After preparation for QAT, note fake-quantization modules \\n',qat_model.features[1].conv)" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Inverted Residual Block: After preparation for QAT, note fake-quantization modules \n", + " Sequential(\n", + " (0): ConvBNReLU(\n", + " (0): ConvBnReLU2d(\n", + " 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False\n", + " (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activation_post_process): FakeQuantize(\n", + " fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), scale=tensor([1.]), zero_point=tensor([0])\n", + " (activation_post_process): MovingAverageMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " (weight_fake_quant): FakeQuantize(\n", + " fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), scale=tensor([1.]), zero_point=tensor([0])\n", + " (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " )\n", + " (1): Identity()\n", + " (2): Identity()\n", + " )\n", + " (1): ConvBn2d(\n", + " 32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False\n", + " (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (activation_post_process): FakeQuantize(\n", + " fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), scale=tensor([1.]), zero_point=tensor([0])\n", + " (activation_post_process): MovingAverageMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " (weight_fake_quant): FakeQuantize(\n", + " fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), scale=tensor([1.]), zero_point=tensor([0])\n", + " (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n", + " )\n", + " )\n", + " (2): Identity()\n", + ")\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lZRffh5YmT-5", + "colab_type": "text" + }, + "source": [ + "Training a quantized model with high accuracy requires accurate modeling of numerics at\n", + "inference. For quantization aware training, therefore, we modify the training loop by:\n", + "\n", + "- Switch batch norm to use running mean and variance towards the end of training to better\n", + " match inference numerics.\n", + "- We also freeze the quantizer parameters (scale and zero-point) and fine tune the weights.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8LMhmh2GmT-6", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 356 + }, + "outputId": "b891db1d-9f05-4a63-ce73-ec20a715f0a4" + }, + "source": [ + "num_train_batches = 20\n", + "\n", + "# Train and check accuracy after each epoch\n", + "for nepoch in range(8):\n", + " train_one_epoch(qat_model, criterion, optimizer, data_loader, torch.device('cpu'), num_train_batches)\n", + " if nepoch > 3:\n", + " # Freeze quantizer parameters\n", + " qat_model.apply(torch.quantization.disable_observer)\n", + " if nepoch > 2:\n", + " # Freeze batch norm mean and variance estimates\n", + " qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)\n", + "\n", + " # Check the accuracy after each epoch\n", + " quantized_model = torch.quantization.convert(qat_model.eval(), inplace=False)\n", + " quantized_model.eval()\n", + " top1, top5 = evaluate(quantized_model,criterion, data_loader_test, neval_batches=num_eval_batches)\n", + " print('Epoch %d :Evaluation accuracy on %d images, %2.2f'%(nepoch, num_eval_batches * eval_batch_size, top1.avg))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "....................Loss tensor(2.0058, grad_fn=)\n", + "Training: * Acc@1 54.500 Acc@5 76.833\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:136: UserWarning: must run observer before calling calculate_qparams. Returning default scale and zero point \n", + " Returning default scale and zero point \"\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "..........Epoch 0 :Evaluation accuracy on 300 images, 75.00\n", + "....................Loss tensor(2.1846, grad_fn=)\n", + "Training: * Acc@1 52.833 Acc@5 74.833\n", + "..........Epoch 1 :Evaluation accuracy on 300 images, 73.67\n", + "....................Loss tensor(1.9846, grad_fn=)\n", + "Training: * Acc@1 54.667 Acc@5 79.167\n", + "..........Epoch 2 :Evaluation accuracy on 300 images, 75.67\n", + "....................Loss tensor(2.0301, grad_fn=)\n", + "Training: * Acc@1 52.500 Acc@5 76.667\n", + "..........Epoch 3 :Evaluation accuracy on 300 images, 75.67\n", + "....................Loss tensor(1.8863, grad_fn=)\n", + "Training: * Acc@1 59.000 Acc@5 80.500\n", + "..........Epoch 4 :Evaluation accuracy on 300 images, 76.00\n", + "............" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uA-s2UhHmT--", + "colab_type": "text" + }, + "source": [ + "Here, we just perform quantization-aware training for a small number of epochs. Nevertheless,\n", + "quantization-aware training yields an accuracy of over 71% on the entire imagenet dataset,\n", + "which is close to the floating point accuracy of 71.9%.\n", + "\n", + "More on quantization-aware training:\n", + "\n", + "- QAT is a super-set of post training quant techniques that allows for more debugging.\n", + " For example, we can analyze if the accuracy of the model is limited by weight or activation\n", + " quantization.\n", + "- We can also simulate the accuracy of a quantized model in floating point since\n", + " we are using fake-quantization to model the numerics of actual quantized arithmetic.\n", + "- We can mimic post training quantization easily too.\n", + "\n", + "Speedup from quantization\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Finally, let's confirm something we alluded to above: do our quantized models actually perform inference\n", + "faster? Let's test:\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uEkFeubKmT-_", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def run_benchmark(model_file, img_loader):\n", + " elapsed = 0\n", + " model = torch.jit.load(model_file)\n", + " model.eval()\n", + " num_batches = 5\n", + " # Run the scripted model on a few batches of images\n", + " for i, (images, target) in enumerate(img_loader):\n", + " if i < num_batches:\n", + " start = time.time()\n", + " output = model(images)\n", + " end = time.time()\n", + " elapsed = elapsed + (end-start)\n", + " else:\n", + " break\n", + " num_images = images.size()[0] * num_batches\n", + "\n", + " print('Elapsed time: %3.0f ms' % (elapsed/num_images*1000))\n", + " return elapsed\n", + "\n", + "run_benchmark(saved_model_dir + scripted_float_model_file, data_loader_test)\n", + "\n", + "run_benchmark(saved_model_dir + scripted_quantized_model_file, data_loader_test)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LMFrLrSwmT_I", + "colab_type": "text" + }, + "source": [ + "Running this locally on a MacBook pro yielded 61 ms for the regular model, and\n", + "just 20 ms for the quantized model, illustrating the typical 2-4x speedup\n", + "we see for quantized models compared to floating point ones.\n", + "\n", + "Conclusion\n", + "----------\n", + "\n", + "In this tutorial, we showed two quantization methods - post-training static quantization,\n", + "and quantization-aware training - describing what they do \"under the hood\" and how to use\n", + "them in PyTorch.\n", + "\n", + "Thanks for reading! As always, we welcome any feedback, so please create an issue\n", + "`here `_ if you have any.\n", + "\n" + ] + } + ] +} \ No newline at end of file