From fa7cff744b5a302a0f2beb5aabcff23711a34698 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Wed, 12 Aug 2020 11:29:44 -0700
Subject: [PATCH 1/6] Fix typo (#1118)

In PyTorch tutorial, `torch` should be installed rather than `torchaudio`
---
 recipes_source/recipes/what_is_state_dict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/recipes_source/recipes/what_is_state_dict.py b/recipes_source/recipes/what_is_state_dict.py
index 8e718e9071e..5e7f259fd7b 100644
--- a/recipes_source/recipes/what_is_state_dict.py
+++ b/recipes_source/recipes/what_is_state_dict.py
@@ -28,7 +28,7 @@
 
 ::
 
-   pip install torchaudio
+   pip install torch
 
 """
 

From f056cf9de0ed24d2f31a661813b68e11b46ecaca Mon Sep 17 00:00:00 2001
From: Parth Patel <parth15041995@gmail.com>
Date: Fri, 21 Aug 2020 00:07:41 +0200
Subject: [PATCH 2/6] imagenet_1k and mobilenet_pretrained_float.pth are
 included in cell

---
 static_quantization_tutorial.ipynb | 41 ++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 static_quantization_tutorial.ipynb

diff --git a/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb
new file mode 100644
index 00000000000..866bd6af38c
--- /dev/null
+++ b/static_quantization_tutorial.ipynb
@@ -0,0 +1,41 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Untitled0.ipynb",
+      "provenance": [],
+      "authorship_tag": "ABX9TyOOi/to8wgIrTQMTwi8uZZ3",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/parth15041995/tutorials/blob/master/static_quantization_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2Q81eF0E5nPd",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From 76593fe1e8f32a3a7bee01571b0156c50ba4b0dd Mon Sep 17 00:00:00 2001
From: Parth Patel <parth.patel@stud.uni-due.de>
Date: Fri, 21 Aug 2020 00:27:13 +0200
Subject: [PATCH 3/6] moved to right place

---
 .../static_quantization_tutorial.ipynb                            | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename static_quantization_tutorial.ipynb => advanced_source/static_quantization_tutorial.ipynb (100%)

diff --git a/static_quantization_tutorial.ipynb b/advanced_source/static_quantization_tutorial.ipynb
similarity index 100%
rename from static_quantization_tutorial.ipynb
rename to advanced_source/static_quantization_tutorial.ipynb

From a8107b5f4702d881edf720dcabcd8aca5dc3ae06 Mon Sep 17 00:00:00 2001
From: Parth Patel <parth.patel@stud.uni-due.de>
Date: Fri, 21 Aug 2020 00:28:19 +0200
Subject: [PATCH 4/6] Revert "moved to right place"

This reverts commit 76593fe1e8f32a3a7bee01571b0156c50ba4b0dd.
---
 ...ntization_tutorial.ipynb => static_quantization_tutorial.ipynb | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename advanced_source/static_quantization_tutorial.ipynb => static_quantization_tutorial.ipynb (100%)

diff --git a/advanced_source/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb
similarity index 100%
rename from advanced_source/static_quantization_tutorial.ipynb
rename to static_quantization_tutorial.ipynb

From 3125d668a9e3beaeb99004ce289ffdf4019cbd9b Mon Sep 17 00:00:00 2001
From: Parth Patel <parth.patel@stud.uni-due.de>
Date: Fri, 21 Aug 2020 00:28:30 +0200
Subject: [PATCH 5/6] Revert "imagenet_1k and mobilenet_pretrained_float.pth
 are included in cell"

This reverts commit f056cf9de0ed24d2f31a661813b68e11b46ecaca.
---
 static_quantization_tutorial.ipynb | 41 ------------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 static_quantization_tutorial.ipynb

diff --git a/static_quantization_tutorial.ipynb b/static_quantization_tutorial.ipynb
deleted file mode 100644
index 866bd6af38c..00000000000
--- a/static_quantization_tutorial.ipynb
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "Untitled0.ipynb",
-      "provenance": [],
-      "authorship_tag": "ABX9TyOOi/to8wgIrTQMTwi8uZZ3",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/parth15041995/tutorials/blob/master/static_quantization_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2Q81eF0E5nPd",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file

From cfa12324f9bcae8f3aa0542360a9ac54fff6f38f Mon Sep 17 00:00:00 2001
From: Parth Patel <parth.patel@stud.uni-due.de>
Date: Sun, 23 Aug 2020 21:53:57 +0200
Subject: [PATCH 6/6] Create static_quantization_tutorial.ipynb

Google Colab ready
---
 .../static_quantization_tutorial.ipynb        | 1269 +++++++++++++++++
 1 file changed, 1269 insertions(+)
 create mode 100644 advanced_source/static_quantization_tutorial.ipynb

diff --git a/advanced_source/static_quantization_tutorial.ipynb b/advanced_source/static_quantization_tutorial.ipynb
new file mode 100644
index 00000000000..c5787ad13ec
--- /dev/null
+++ b/advanced_source/static_quantization_tutorial.ipynb
@@ -0,0 +1,1269 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    },
+    "colab": {
+      "name": "Copy of static_quantization_tutorial.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "S-IqlZHXmT9F",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "%matplotlib inline"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GwgB_PutmT9N",
+        "colab_type": "text"
+      },
+      "source": [
+        "\n",
+        "(beta) Static Quantization with Eager Mode in PyTorch\n",
+        "=========================================================\n",
+        "\n",
+        "**Author**: `Raghuraman Krishnamoorthi <https://github.com/raghuramank100>`_\n",
+        "\n",
+        "**Edited by**: `Parth Patel <https://github.com/ParthPatel-ES>`_\n",
+        "\n",
+        "This tutorial shows how to do post-training static quantization, as well as illustrating\n",
+        "two more advanced techniques - per-channel quantization and quantization-aware training -\n",
+        "to further improve the model's accuracy. Note that quantization is currently only supported\n",
+        "for CPUs, so we will not be utilizing GPUs / CUDA in this tutorial.\n",
+        "\n",
+        "By the end of this tutorial, you will see how quantization in PyTorch can result in\n",
+        "significant decreases in model size while increasing speed. Furthermore, you'll see how\n",
+        "to easily apply some advanced quantization techniques shown\n",
+        "`here <https://arxiv.org/abs/1806.08342>`_ so that your quantized models take much less\n",
+        "of an accuracy hit than they would otherwise.\n",
+        "\n",
+        "Warning: we use a lot of boilerplate code from other PyTorch repos to, for example,\n",
+        "define the ``MobileNetV2`` model archtecture, define data loaders, and so on. We of course\n",
+        "encourage you to read it; but if you want to get to the quantization features, feel free\n",
+        "to skip to the \"4. Post-training static quantization\" section.\n",
+        "\n",
+        "We'll start by doing the necessary imports:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TVO3o5uLmT9P",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "8ad1c9e8-5e88-4d3f-9b29-d0c1ccbde953"
+      },
+      "source": [
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torchvision\n",
+        "from torch.utils.data import DataLoader\n",
+        "from torchvision import datasets\n",
+        "import torchvision.transforms as transforms\n",
+        "import os\n",
+        "import time\n",
+        "import sys\n",
+        "import torch.quantization\n",
+        "\n",
+        "# # Setup warnings\n",
+        "import warnings\n",
+        "warnings.filterwarnings(\n",
+        "    action='ignore',\n",
+        "    category=DeprecationWarning,\n",
+        "    module=r'.*'\n",
+        ")\n",
+        "warnings.filterwarnings(\n",
+        "    action='default',\n",
+        "    module=r'torch.quantization'\n",
+        ")\n",
+        "\n",
+        "# Specify random seed for repeatable results\n",
+        "torch.manual_seed(191009)"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<torch._C.Generator at 0x7f5d17abb138>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 2
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9yGOQeD6mT9V",
+        "colab_type": "text"
+      },
+      "source": [
+        "1. Model architecture\n",
+        "---------------------\n",
+        "\n",
+        "We first define the MobileNetV2 model architecture, with several notable modifications\n",
+        "to enable quantization:\n",
+        "\n",
+        "- Replacing addition with ``nn.quantized.FloatFunctional``\n",
+        "- Insert ``QuantStub`` and ``DeQuantStub`` at the beginning and end of the network.\n",
+        "- Replace ReLU6 with ReLU\n",
+        "\n",
+        "Note: this code is taken from\n",
+        "`here <https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenet.py>`_.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EZpdZtHwmT9W",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from torch.quantization import QuantStub, DeQuantStub\n",
+        "\n",
+        "def _make_divisible(v, divisor, min_value=None):\n",
+        "    \"\"\"\n",
+        "    This function is taken from the original tf repo.\n",
+        "    It ensures that all layers have a channel number that is divisible by 8\n",
+        "    It can be seen here:\n",
+        "    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n",
+        "    :param v:\n",
+        "    :param divisor:\n",
+        "    :param min_value:\n",
+        "    :return:\n",
+        "    \"\"\"\n",
+        "    if min_value is None:\n",
+        "        min_value = divisor\n",
+        "    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)\n",
+        "    # Make sure that round down does not go down by more than 10%.\n",
+        "    if new_v < 0.9 * v:\n",
+        "        new_v += divisor\n",
+        "    return new_v\n",
+        "\n",
+        "\n",
+        "class ConvBNReLU(nn.Sequential):\n",
+        "    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):\n",
+        "        padding = (kernel_size - 1) // 2\n",
+        "        super(ConvBNReLU, self).__init__(\n",
+        "            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),\n",
+        "            nn.BatchNorm2d(out_planes, momentum=0.1),\n",
+        "            # Replace with ReLU\n",
+        "            nn.ReLU(inplace=False)\n",
+        "        )\n",
+        "\n",
+        "\n",
+        "class InvertedResidual(nn.Module):\n",
+        "    def __init__(self, inp, oup, stride, expand_ratio):\n",
+        "        super(InvertedResidual, self).__init__()\n",
+        "        self.stride = stride\n",
+        "        assert stride in [1, 2]\n",
+        "\n",
+        "        hidden_dim = int(round(inp * expand_ratio))\n",
+        "        self.use_res_connect = self.stride == 1 and inp == oup\n",
+        "\n",
+        "        layers = []\n",
+        "        if expand_ratio != 1:\n",
+        "            # pw\n",
+        "            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))\n",
+        "        layers.extend([\n",
+        "            # dw\n",
+        "            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),\n",
+        "            # pw-linear\n",
+        "            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),\n",
+        "            nn.BatchNorm2d(oup, momentum=0.1),\n",
+        "        ])\n",
+        "        self.conv = nn.Sequential(*layers)\n",
+        "        # Replace torch.add with floatfunctional\n",
+        "        self.skip_add = nn.quantized.FloatFunctional()\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        if self.use_res_connect:\n",
+        "            return self.skip_add.add(x, self.conv(x))\n",
+        "        else:\n",
+        "            return self.conv(x)\n",
+        "\n",
+        "\n",
+        "class MobileNetV2(nn.Module):\n",
+        "    def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):\n",
+        "        \"\"\"\n",
+        "        MobileNet V2 main class\n",
+        "\n",
+        "        Args:\n",
+        "            num_classes (int): Number of classes\n",
+        "            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount\n",
+        "            inverted_residual_setting: Network structure\n",
+        "            round_nearest (int): Round the number of channels in each layer to be a multiple of this number\n",
+        "            Set to 1 to turn off rounding\n",
+        "        \"\"\"\n",
+        "        super(MobileNetV2, self).__init__()\n",
+        "        block = InvertedResidual\n",
+        "        input_channel = 32\n",
+        "        last_channel = 1280\n",
+        "\n",
+        "        if inverted_residual_setting is None:\n",
+        "            inverted_residual_setting = [\n",
+        "                # t, c, n, s\n",
+        "                [1, 16, 1, 1],\n",
+        "                [6, 24, 2, 2],\n",
+        "                [6, 32, 3, 2],\n",
+        "                [6, 64, 4, 2],\n",
+        "                [6, 96, 3, 1],\n",
+        "                [6, 160, 3, 2],\n",
+        "                [6, 320, 1, 1],\n",
+        "            ]\n",
+        "\n",
+        "        # only check the first element, assuming user knows t,c,n,s are required\n",
+        "        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:\n",
+        "            raise ValueError(\"inverted_residual_setting should be non-empty \"\n",
+        "                             \"or a 4-element list, got {}\".format(inverted_residual_setting))\n",
+        "\n",
+        "        # building first layer\n",
+        "        input_channel = _make_divisible(input_channel * width_mult, round_nearest)\n",
+        "        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)\n",
+        "        features = [ConvBNReLU(3, input_channel, stride=2)]\n",
+        "        # building inverted residual blocks\n",
+        "        for t, c, n, s in inverted_residual_setting:\n",
+        "            output_channel = _make_divisible(c * width_mult, round_nearest)\n",
+        "            for i in range(n):\n",
+        "                stride = s if i == 0 else 1\n",
+        "                features.append(block(input_channel, output_channel, stride, expand_ratio=t))\n",
+        "                input_channel = output_channel\n",
+        "        # building last several layers\n",
+        "        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))\n",
+        "        # make it nn.Sequential\n",
+        "        self.features = nn.Sequential(*features)\n",
+        "        self.quant = QuantStub()\n",
+        "        self.dequant = DeQuantStub()\n",
+        "        # building classifier\n",
+        "        self.classifier = nn.Sequential(\n",
+        "            nn.Dropout(0.2),\n",
+        "            nn.Linear(self.last_channel, num_classes),\n",
+        "        )\n",
+        "\n",
+        "        # weight initialization\n",
+        "        for m in self.modules():\n",
+        "            if isinstance(m, nn.Conv2d):\n",
+        "                nn.init.kaiming_normal_(m.weight, mode='fan_out')\n",
+        "                if m.bias is not None:\n",
+        "                    nn.init.zeros_(m.bias)\n",
+        "            elif isinstance(m, nn.BatchNorm2d):\n",
+        "                nn.init.ones_(m.weight)\n",
+        "                nn.init.zeros_(m.bias)\n",
+        "            elif isinstance(m, nn.Linear):\n",
+        "                nn.init.normal_(m.weight, 0, 0.01)\n",
+        "                nn.init.zeros_(m.bias)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "\n",
+        "        x = self.quant(x)\n",
+        "\n",
+        "        x = self.features(x)\n",
+        "        x = x.mean([2, 3])\n",
+        "        x = self.classifier(x)\n",
+        "        x = self.dequant(x)\n",
+        "        return x\n",
+        "\n",
+        "    # Fuse Conv+BN and Conv+BN+Relu modules prior to quantization\n",
+        "    # This operation does not change the numerics\n",
+        "    def fuse_model(self):\n",
+        "        for m in self.modules():\n",
+        "            if type(m) == ConvBNReLU:\n",
+        "                torch.quantization.fuse_modules(m, ['0', '1', '2'], inplace=True)\n",
+        "            if type(m) == InvertedResidual:\n",
+        "                for idx in range(len(m.conv)):\n",
+        "                    if type(m.conv[idx]) == nn.Conv2d:\n",
+        "                        torch.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NQpHqXh0mT9b",
+        "colab_type": "text"
+      },
+      "source": [
+        "2. Helper functions\n",
+        "-------------------\n",
+        "\n",
+        "We next define several helper functions to help with model evaluation. These mostly come from\n",
+        "`here <https://github.com/pytorch/examples/blob/master/imagenet/main.py>`_.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vL1GtBw9mT9c",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class AverageMeter(object):\n",
+        "    \"\"\"Computes and stores the average and current value\"\"\"\n",
+        "    def __init__(self, name, fmt=':f'):\n",
+        "        self.name = name\n",
+        "        self.fmt = fmt\n",
+        "        self.reset()\n",
+        "\n",
+        "    def reset(self):\n",
+        "        self.val = 0\n",
+        "        self.avg = 0\n",
+        "        self.sum = 0\n",
+        "        self.count = 0\n",
+        "\n",
+        "    def update(self, val, n=1):\n",
+        "        self.val = val\n",
+        "        self.sum += val * n\n",
+        "        self.count += n\n",
+        "        self.avg = self.sum / self.count\n",
+        "\n",
+        "    def __str__(self):\n",
+        "        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n",
+        "        return fmtstr.format(**self.__dict__)\n",
+        "\n",
+        "\n",
+        "def accuracy(output, target, topk=(1,)):\n",
+        "    \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n",
+        "    with torch.no_grad():\n",
+        "        maxk = max(topk)\n",
+        "        batch_size = target.size(0)\n",
+        "\n",
+        "        _, pred = output.topk(maxk, 1, True, True)\n",
+        "        pred = pred.t()\n",
+        "        correct = pred.eq(target.view(1, -1).expand_as(pred))\n",
+        "\n",
+        "        res = []\n",
+        "        for k in topk:\n",
+        "            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)\n",
+        "            res.append(correct_k.mul_(100.0 / batch_size))\n",
+        "        return res\n",
+        "\n",
+        "\n",
+        "def evaluate(model, criterion, data_loader, neval_batches):\n",
+        "    model.eval()\n",
+        "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
+        "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
+        "    cnt = 0\n",
+        "    with torch.no_grad():\n",
+        "        for image, target in data_loader:\n",
+        "            output = model(image)\n",
+        "            loss = criterion(output, target)\n",
+        "            cnt += 1\n",
+        "            acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
+        "            print('.', end = '')\n",
+        "            top1.update(acc1[0], image.size(0))\n",
+        "            top5.update(acc5[0], image.size(0))\n",
+        "            if cnt >= neval_batches:\n",
+        "                 return top1, top5\n",
+        "\n",
+        "    return top1, top5\n",
+        "\n",
+        "def load_model(model_file):\n",
+        "    model = MobileNetV2()\n",
+        "    state_dict = torch.load(model_file)\n",
+        "    model.load_state_dict(state_dict)\n",
+        "    model.to('cpu')\n",
+        "    return model\n",
+        "\n",
+        "def print_size_of_model(model):\n",
+        "    torch.save(model.state_dict(), \"temp.p\")\n",
+        "    print('Size (MB):', os.path.getsize(\"temp.p\")/1e6)\n",
+        "    os.remove('temp.p')"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nybyd3cap7NC",
+        "colab_type": "text"
+      },
+      "source": [
+        "3. Define dataset and data loaders\n",
+        "----------------------------------\n",
+        "\n",
+        "As our last major setup step, we define our dataloaders for our training and testing set.\n",
+        "\n",
+        "\n",
+        "The specific dataset we've created for this tutorial contains just 1000 images from the ImageNet data, one from\n",
+        "each class (this dataset, at just over 250 MB, is small enough that it can be downloaded\n",
+        "relatively easily). The URL for this custom dataset is:\n",
+        "\n",
+        "    https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip\n",
+        "\n",
+        "For the tutorial to run, you can also download this data and move it to the right place using\n",
+        "`these lines <https://github.com/pytorch/tutorials/blob/master/Makefile#L97-L98>`_\n",
+        "from the `Makefile <https://github.com/pytorch/tutorials/blob/master/Makefile>` if not on Google Colab.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AU-068bDmT9i",
+        "colab_type": "text"
+      },
+      "source": [
+        "\n",
+        "To run the code in this tutorial using the entire ImageNet dataset, on the other hand, you could download\n",
+        "the data using ``torchvision`` following\n",
+        "`here <https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet>`_. Which might not be publicly available.  "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "p-KGyInjXnT1",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import requests\n",
+        "import zipfile\n",
+        "\n",
+        "url = 'https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip'\n",
+        "r = requests.get(url, allow_redirects=True)\n",
+        "\n",
+        "open('imagenet_1k.zip', 'wb').write(r.content)\n",
+        "\n",
+        "with zipfile.ZipFile('/content/imagenet_1k.zip', 'r') as zip_ref:\n",
+        "    zip_ref.extractall('./data')"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GTo-JBjVmT9j",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def prepare_data_loaders(data_path):\n",
+        "\n",
+        "    traindir = os.path.join(data_path, 'train')\n",
+        "    valdir = os.path.join(data_path, 'val')\n",
+        "    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
+        "                                     std=[0.229, 0.224, 0.225])\n",
+        "\n",
+        "    dataset = torchvision.datasets.ImageFolder(\n",
+        "        traindir,\n",
+        "        transforms.Compose([\n",
+        "            transforms.RandomResizedCrop(224),\n",
+        "            transforms.RandomHorizontalFlip(),\n",
+        "            transforms.ToTensor(),\n",
+        "            normalize,\n",
+        "        ]))\n",
+        "\n",
+        "    dataset_test = torchvision.datasets.ImageFolder(\n",
+        "        valdir,\n",
+        "        transforms.Compose([\n",
+        "            transforms.Resize(256),\n",
+        "            transforms.CenterCrop(224),\n",
+        "            transforms.ToTensor(),\n",
+        "            normalize,\n",
+        "        ]))\n",
+        "\n",
+        "    train_sampler = torch.utils.data.RandomSampler(dataset)\n",
+        "    test_sampler = torch.utils.data.SequentialSampler(dataset_test)\n",
+        "\n",
+        "    data_loader = torch.utils.data.DataLoader(\n",
+        "        dataset, batch_size=train_batch_size,\n",
+        "        sampler=train_sampler)\n",
+        "\n",
+        "    data_loader_test = torch.utils.data.DataLoader(\n",
+        "        dataset_test, batch_size=eval_batch_size,\n",
+        "        sampler=test_sampler)\n",
+        "\n",
+        "    return data_loader, data_loader_test"
+      ],
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nZ1rbOa6mT9p",
+        "colab_type": "text"
+      },
+      "source": [
+        "Next, we'll load in the pre-trained MobileNetV2 model. We provide the URL to download the data from in ``torchvision``\n",
+        "`here <https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenet.py#L9>`_.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3OmoorWi0zVJ",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "76174591-33d8-4824-8741-9016a00b4d6d"
+      },
+      "source": [
+        "url = 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'\n",
+        "r = requests.get(url, allow_redirects=True)\n",
+        "open('./data/mobilenet_pretrained_float.pth', 'wb').write(r.content)"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "14212972"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2-SFR69KmT9q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "data_path = 'data/imagenet_1k'\n",
+        "saved_model_dir = 'data/'\n",
+        "float_model_file = 'mobilenet_pretrained_float.pth'\n",
+        "scripted_float_model_file = 'mobilenet_quantization_scripted.pth'\n",
+        "scripted_quantized_model_file = 'mobilenet_quantization_scripted_quantized.pth'\n",
+        "\n",
+        "train_batch_size = 30\n",
+        "eval_batch_size = 30\n",
+        "\n",
+        "data_loader, data_loader_test = prepare_data_loaders(data_path)\n",
+        "criterion = nn.CrossEntropyLoss()\n",
+        "float_model = load_model(saved_model_dir + float_model_file).to('cpu')"
+      ],
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lR5e2h7WmT90",
+        "colab_type": "text"
+      },
+      "source": [
+        "Next, we'll \"fuse modules\"; this can both make the model faster by saving on memory access\n",
+        "while also improving numerical accuracy. While this can be used with any model, this is\n",
+        "especially common with quantized models.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jrzTSHWLmT91",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 496
+        },
+        "outputId": "31ddd15e-fc30-4d40-e0a7-9887cd5e5029"
+      },
+      "source": [
+        "print('\\n Inverted Residual Block: Before fusion \\n\\n', float_model.features[1].conv)\n",
+        "float_model.eval()\n",
+        "\n",
+        "# Fuses modules\n",
+        "float_model.fuse_model()\n",
+        "\n",
+        "# Note fusion of Conv+BN+Relu and Conv+Relu\n",
+        "print('\\n Inverted Residual Block: After fusion\\n\\n',float_model.features[1].conv)"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "\n",
+            " Inverted Residual Block: Before fusion \n",
+            "\n",
+            " Sequential(\n",
+            "  (0): ConvBNReLU(\n",
+            "    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n",
+            "    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+            "    (2): ReLU()\n",
+            "  )\n",
+            "  (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+            "  (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+            ")\n",
+            "\n",
+            " Inverted Residual Block: After fusion\n",
+            "\n",
+            " Sequential(\n",
+            "  (0): ConvBNReLU(\n",
+            "    (0): ConvReLU2d(\n",
+            "      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)\n",
+            "      (1): ReLU()\n",
+            "    )\n",
+            "    (1): Identity()\n",
+            "    (2): Identity()\n",
+            "  )\n",
+            "  (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))\n",
+            "  (2): Identity()\n",
+            ")\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GL4sKU5nmT-D",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally to get a \"baseline\" accuracy, let's see the accuracy of our un-quantized model\n",
+        "with fused modules\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "F9pqyGX9mT-G",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 70
+        },
+        "outputId": "4a09ef24-5357-4413-f147-80e662221558"
+      },
+      "source": [
+        "num_eval_batches = 10\n",
+        "\n",
+        "print(\"Size of baseline model\")\n",
+        "print_size_of_model(float_model)\n",
+        "\n",
+        "top1, top5 = evaluate(float_model, criterion, data_loader_test, neval_batches=num_eval_batches)\n",
+        "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))\n",
+        "torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)"
+      ],
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Size of baseline model\n",
+            "Size (MB): 13.998515\n",
+            "..........Evaluation accuracy on 300 images, 78.00\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l9IMIpdPmT-L",
+        "colab_type": "text"
+      },
+      "source": [
+        "We see 78% accuracy on 300 images, a solid baseline for ImageNet,\n",
+        "especially considering our model is just 14.0 MB.\n",
+        "\n",
+        "This will be our baseline to compare to. Next, let's try different quantization methods\n",
+        "\n",
+        "4. Post-training static quantization\n",
+        "------------------------------------\n",
+        "\n",
+        "Post-training static quantization involves not just converting the weights from float to int,\n",
+        "as in dynamic quantization, but also performing the additional step of first feeding batches\n",
+        "of data through the network and computing the resulting distributions of the different activations\n",
+        "(specifically, this is done by inserting `observer` modules at different points that record this\n",
+        "data). These distributions are then used to determine how the specifically the different activations\n",
+        "should be quantized at inference time (a simple technique would be to simply divide the entire range\n",
+        "of activations into 256 levels, but we support more sophisticated methods as well). Importantly,\n",
+        "this additional step allows us to pass quantized values between operations instead of converting these\n",
+        "values to floats - and then back to ints - between every operation, resulting in a significant speed-up.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vr3kkFyLmT-N",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 818
+        },
+        "outputId": "fc78613d-21f1-4f2b-d3e4-addd2557d88c"
+      },
+      "source": [
+        "num_calibration_batches = 10\n",
+        "\n",
+        "myModel = load_model(saved_model_dir + float_model_file).to('cpu')\n",
+        "myModel.eval()\n",
+        "\n",
+        "# Fuse Conv, bn and relu\n",
+        "myModel.fuse_model()\n",
+        "\n",
+        "# Specify quantization configuration\n",
+        "# Start with simple min/max range estimation and per-tensor quantization of weights\n",
+        "myModel.qconfig = torch.quantization.default_qconfig\n",
+        "print(myModel.qconfig)\n",
+        "torch.quantization.prepare(myModel, inplace=True)\n",
+        "\n",
+        "# Calibrate first\n",
+        "print('Post Training Quantization Prepare: Inserting Observers')\n",
+        "print('\\n Inverted Residual Block:After observer insertion \\n\\n', myModel.features[1].conv)\n",
+        "\n",
+        "# Calibrate with the training set\n",
+        "evaluate(myModel, criterion, data_loader, neval_batches=num_calibration_batches)\n",
+        "print('Post Training Quantization: Calibration done')\n",
+        "\n",
+        "# Convert to quantized model\n",
+        "torch.quantization.convert(myModel, inplace=True)\n",
+        "print('Post Training Quantization: Convert done')\n",
+        "print('\\n Inverted Residual Block: After fusion and quantization, note fused modules: \\n\\n',myModel.features[1].conv)\n",
+        "\n",
+        "print(\"Size of model after quantization\")\n",
+        "print_size_of_model(myModel)\n",
+        "\n",
+        "top1, top5 = evaluate(myModel, criterion, data_loader_test, neval_batches=num_eval_batches)\n",
+        "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))"
+      ],
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "QConfig(activation=functools.partial(<class 'torch.quantization.observer.MinMaxObserver'>, reduce_range=True), weight=functools.partial(<class 'torch.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric))\n",
+            "Post Training Quantization Prepare: Inserting Observers\n",
+            "\n",
+            " Inverted Residual Block:After observer insertion \n",
+            "\n",
+            " Sequential(\n",
+            "  (0): ConvBNReLU(\n",
+            "    (0): ConvReLU2d(\n",
+            "      (0): Conv2d(\n",
+            "        32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32\n",
+            "        (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "      )\n",
+            "      (1): ReLU(\n",
+            "        (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "      )\n",
+            "    )\n",
+            "    (1): Identity()\n",
+            "    (2): Identity()\n",
+            "  )\n",
+            "  (1): Conv2d(\n",
+            "    32, 16, kernel_size=(1, 1), stride=(1, 1)\n",
+            "    (activation_post_process): MinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "  )\n",
+            "  (2): Identity()\n",
+            ")\n",
+            "..........Post Training Quantization: Calibration done\n",
+            "Post Training Quantization: Convert done\n",
+            "\n",
+            " Inverted Residual Block: After fusion and quantization, note fused modules: \n",
+            "\n",
+            " Sequential(\n",
+            "  (0): ConvBNReLU(\n",
+            "    (0): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.1306864470243454, zero_point=0, padding=(1, 1), groups=32)\n",
+            "    (1): Identity()\n",
+            "    (2): Identity()\n",
+            "  )\n",
+            "  (1): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=0.16245220601558685, zero_point=65)\n",
+            "  (2): Identity()\n",
+            ")\n",
+            "Size of model after quantization\n",
+            "Size (MB): 3.629327\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:136: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point \n",
+            "  Returning default scale and zero point \"\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "..........Evaluation accuracy on 300 images, 67.67\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9UvwzZRamT-a",
+        "colab_type": "text"
+      },
+      "source": [
+        "For this quantized model, we see a significantly lower accuracy of just ~62% on these same 300\n",
+        "images. Nevertheless, we did reduce the size of our model down to just under 3.6 MB, almost a 4x\n",
+        "decrease.\n",
+        "\n",
+        "In addition, we can significantly improve on the accuracy simply by using a different\n",
+        "quantization configuration. We repeat the same exercise with the recommended configuration for\n",
+        "quantizing for x86 architectures. This configuration does the following:\n",
+        "\n",
+        "- Quantizes weights on a per-channel basis\n",
+        "- Uses a histogram observer that collects a histogram of activations and then picks\n",
+        "  quantization parameters in an optimal manner.\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0I6WUUgQmT-b",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 107
+        },
+        "outputId": "89fb543e-2fd8-4a23-abd3-7bf4d00d00d6"
+      },
+      "source": [
+        "per_channel_quantized_model = load_model(saved_model_dir + float_model_file)\n",
+        "per_channel_quantized_model.eval()\n",
+        "per_channel_quantized_model.fuse_model()\n",
+        "per_channel_quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')\n",
+        "print(per_channel_quantized_model.qconfig)\n",
+        "\n",
+        "torch.quantization.prepare(per_channel_quantized_model, inplace=True)\n",
+        "evaluate(per_channel_quantized_model,criterion, data_loader, num_calibration_batches)\n",
+        "torch.quantization.convert(per_channel_quantized_model, inplace=True)\n",
+        "top1, top5 = evaluate(per_channel_quantized_model, criterion, data_loader_test, neval_batches=num_eval_batches)\n",
+        "print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg))\n",
+        "torch.jit.save(torch.jit.script(per_channel_quantized_model), saved_model_dir + scripted_quantized_model_file)"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "QConfig(activation=functools.partial(<class 'torch.quantization.observer.HistogramObserver'>, reduce_range=True), weight=functools.partial(<class 'torch.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric))\n",
+            ".........."
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:877: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point \n",
+            "  Returning default scale and zero point \"\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "..........Evaluation accuracy on 300 images, 76.00\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z1ussBHjmT-g",
+        "colab_type": "text"
+      },
+      "source": [
+        "Changing just this quantization configuration method resulted in an increase\n",
+        "of the accuracy to over 76%! Still, this is 1-2% worse than the baseline of 78% achieved above.\n",
+        "So lets try quantization aware training.\n",
+        "\n",
+        "5. Quantization-aware training\n",
+        "------------------------------\n",
+        "\n",
+        "Quantization-aware training (QAT) is the quantization method that typically results in the highest accuracy.\n",
+        "With QAT, all weights and activations are “fake quantized” during both the forward and backward passes of\n",
+        "training: that is, float values are rounded to mimic int8 values, but all computations are still done with\n",
+        "floating point numbers. Thus, all the weight adjustments during training are made while “aware” of the fact\n",
+        "that the model will ultimately be quantized; after quantizing, therefore, this method will usually yield\n",
+        "higher accuracy than either dynamic quantization or post-training static quantization.\n",
+        "\n",
+        "The overall workflow for actually performing QAT is very similar to before:\n",
+        "\n",
+        "- We can use the same model as before: there is no additional preparation needed for quantization-aware\n",
+        "  training.\n",
+        "- We need to use a ``qconfig`` specifying what kind of fake-quantization is to be inserted after weights\n",
+        "  and activations, instead of specifying observers\n",
+        "\n",
+        "We first define a training function:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "APtX9dtKmT-h",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def train_one_epoch(model, criterion, optimizer, data_loader, device, ntrain_batches):\n",
+        "    model.train()\n",
+        "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
+        "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
+        "    avgloss = AverageMeter('Loss', '1.5f')\n",
+        "\n",
+        "    cnt = 0\n",
+        "    for image, target in data_loader:\n",
+        "        start_time = time.time()\n",
+        "        print('.', end = '')\n",
+        "        cnt += 1\n",
+        "        image, target = image.to(device), target.to(device)\n",
+        "        output = model(image)\n",
+        "        loss = criterion(output, target)\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "        acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
+        "        top1.update(acc1[0], image.size(0))\n",
+        "        top5.update(acc5[0], image.size(0))\n",
+        "        avgloss.update(loss, image.size(0))\n",
+        "        if cnt >= ntrain_batches:\n",
+        "            print('Loss', avgloss.avg)\n",
+        "\n",
+        "            print('Training: * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n",
+        "                  .format(top1=top1, top5=top5))\n",
+        "            return\n",
+        "\n",
+        "    print('Full imagenet train set:  * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'\n",
+        "          .format(top1=top1, top5=top5))\n",
+        "    return"
+      ],
+      "execution_count": 13,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eygHNI-VmT-o",
+        "colab_type": "text"
+      },
+      "source": [
+        "We fuse modules as before\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OhT0liNqmT-p",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "qat_model = load_model(saved_model_dir + float_model_file)\n",
+        "qat_model.fuse_model()\n",
+        "\n",
+        "optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)\n",
+        "qat_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')"
+      ],
+      "execution_count": 14,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GP7eBfldmT-v",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally, ``prepare_qat`` performs the \"fake quantization\", preparing the model for quantization-aware\n",
+        "training\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9_XnVNcCmT-w",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 605
+        },
+        "outputId": "0e5d8790-b77a-4c43-98a8-b09bb720c88e"
+      },
+      "source": [
+        "torch.quantization.prepare_qat(qat_model, inplace=True)\n",
+        "print('Inverted Residual Block: After preparation for QAT, note fake-quantization modules \\n',qat_model.features[1].conv)"
+      ],
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Inverted Residual Block: After preparation for QAT, note fake-quantization modules \n",
+            " Sequential(\n",
+            "  (0): ConvBNReLU(\n",
+            "    (0): ConvBnReLU2d(\n",
+            "      32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False\n",
+            "      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+            "      (activation_post_process): FakeQuantize(\n",
+            "        fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8),            scale=tensor([1.]), zero_point=tensor([0])\n",
+            "        (activation_post_process): MovingAverageMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "      )\n",
+            "      (weight_fake_quant): FakeQuantize(\n",
+            "        fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8),            scale=tensor([1.]), zero_point=tensor([0])\n",
+            "        (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "      )\n",
+            "    )\n",
+            "    (1): Identity()\n",
+            "    (2): Identity()\n",
+            "  )\n",
+            "  (1): ConvBn2d(\n",
+            "    32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
+            "    (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+            "    (activation_post_process): FakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8),            scale=tensor([1.]), zero_point=tensor([0])\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (weight_fake_quant): FakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8),            scale=tensor([1.]), zero_point=tensor([0])\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "  )\n",
+            "  (2): Identity()\n",
+            ")\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lZRffh5YmT-5",
+        "colab_type": "text"
+      },
+      "source": [
+        "Training a quantized model with high accuracy requires accurate modeling of numerics at\n",
+        "inference. For quantization aware training, therefore, we modify the training loop by:\n",
+        "\n",
+        "- Switch batch norm to use running mean and variance towards the end of training to better\n",
+        "  match inference numerics.\n",
+        "- We also freeze the quantizer parameters (scale and zero-point) and fine tune the weights.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8LMhmh2GmT-6",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 356
+        },
+        "outputId": "b891db1d-9f05-4a63-ce73-ec20a715f0a4"
+      },
+      "source": [
+        "num_train_batches = 20\n",
+        "\n",
+        "# Train and check accuracy after each epoch\n",
+        "for nepoch in range(8):\n",
+        "    train_one_epoch(qat_model, criterion, optimizer, data_loader, torch.device('cpu'), num_train_batches)\n",
+        "    if nepoch > 3:\n",
+        "        # Freeze quantizer parameters\n",
+        "        qat_model.apply(torch.quantization.disable_observer)\n",
+        "    if nepoch > 2:\n",
+        "        # Freeze batch norm mean and variance estimates\n",
+        "        qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)\n",
+        "\n",
+        "    # Check the accuracy after each epoch\n",
+        "    quantized_model = torch.quantization.convert(qat_model.eval(), inplace=False)\n",
+        "    quantized_model.eval()\n",
+        "    top1, top5 = evaluate(quantized_model,criterion, data_loader_test, neval_batches=num_eval_batches)\n",
+        "    print('Epoch %d :Evaluation accuracy on %d images, %2.2f'%(nepoch, num_eval_batches * eval_batch_size, top1.avg))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "....................Loss tensor(2.0058, grad_fn=<DivBackward0>)\n",
+            "Training: * Acc@1 54.500 Acc@5 76.833\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.6/dist-packages/torch/quantization/observer.py:136: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point \n",
+            "  Returning default scale and zero point \"\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "..........Epoch 0 :Evaluation accuracy on 300 images, 75.00\n",
+            "....................Loss tensor(2.1846, grad_fn=<DivBackward0>)\n",
+            "Training: * Acc@1 52.833 Acc@5 74.833\n",
+            "..........Epoch 1 :Evaluation accuracy on 300 images, 73.67\n",
+            "....................Loss tensor(1.9846, grad_fn=<DivBackward0>)\n",
+            "Training: * Acc@1 54.667 Acc@5 79.167\n",
+            "..........Epoch 2 :Evaluation accuracy on 300 images, 75.67\n",
+            "....................Loss tensor(2.0301, grad_fn=<DivBackward0>)\n",
+            "Training: * Acc@1 52.500 Acc@5 76.667\n",
+            "..........Epoch 3 :Evaluation accuracy on 300 images, 75.67\n",
+            "....................Loss tensor(1.8863, grad_fn=<DivBackward0>)\n",
+            "Training: * Acc@1 59.000 Acc@5 80.500\n",
+            "..........Epoch 4 :Evaluation accuracy on 300 images, 76.00\n",
+            "............"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uA-s2UhHmT--",
+        "colab_type": "text"
+      },
+      "source": [
+        "Here, we just perform quantization-aware training for a small number of epochs. Nevertheless,\n",
+        "quantization-aware training yields an accuracy of over 71% on the entire imagenet dataset,\n",
+        "which is close to the floating point accuracy of 71.9%.\n",
+        "\n",
+        "More on quantization-aware training:\n",
+        "\n",
+        "- QAT is a super-set of post training quant techniques that allows for more debugging.\n",
+        "  For example, we can analyze if the accuracy of the model is limited by weight or activation\n",
+        "  quantization.\n",
+        "- We can also simulate the accuracy of a quantized model in floating point since\n",
+        "  we are using fake-quantization to model the numerics of actual quantized arithmetic.\n",
+        "- We can mimic post training quantization easily too.\n",
+        "\n",
+        "Speedup from quantization\n",
+        "^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+        "\n",
+        "Finally, let's confirm something we alluded to above: do our quantized models actually perform inference\n",
+        "faster? Let's test:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uEkFeubKmT-_",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def run_benchmark(model_file, img_loader):\n",
+        "    elapsed = 0\n",
+        "    model = torch.jit.load(model_file)\n",
+        "    model.eval()\n",
+        "    num_batches = 5\n",
+        "    # Run the scripted model on a few batches of images\n",
+        "    for i, (images, target) in enumerate(img_loader):\n",
+        "        if i < num_batches:\n",
+        "            start = time.time()\n",
+        "            output = model(images)\n",
+        "            end = time.time()\n",
+        "            elapsed = elapsed + (end-start)\n",
+        "        else:\n",
+        "            break\n",
+        "    num_images = images.size()[0] * num_batches\n",
+        "\n",
+        "    print('Elapsed time: %3.0f ms' % (elapsed/num_images*1000))\n",
+        "    return elapsed\n",
+        "\n",
+        "run_benchmark(saved_model_dir + scripted_float_model_file, data_loader_test)\n",
+        "\n",
+        "run_benchmark(saved_model_dir + scripted_quantized_model_file, data_loader_test)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LMFrLrSwmT_I",
+        "colab_type": "text"
+      },
+      "source": [
+        "Running this locally on a MacBook pro yielded 61 ms for the regular model, and\n",
+        "just 20 ms for the quantized model, illustrating the typical 2-4x speedup\n",
+        "we see for quantized models compared to floating point ones.\n",
+        "\n",
+        "Conclusion\n",
+        "----------\n",
+        "\n",
+        "In this tutorial, we showed two quantization methods - post-training static quantization,\n",
+        "and quantization-aware training - describing what they do \"under the hood\" and how to use\n",
+        "them in PyTorch.\n",
+        "\n",
+        "Thanks for reading! As always, we welcome any feedback, so please create an issue\n",
+        "`here <https://github.com/pytorch/pytorch/issues>`_ if you have any.\n",
+        "\n"
+      ]
+    }
+  ]
+}
\ No newline at end of file