Adding quantizable MobileNetV3 architecture.

datumbox · datumbox · commit 1ab143e3e128 · 2021-01-29T11:21:23.000Z
diff --git a/torchvision/models/quantization/mobilenet.py b/torchvision/models/quantization/mobilenet.py
@@ -1,3 +1,4 @@
 from .mobilenetv2 import QuantizableMobileNetV2, mobilenet_v2, __all__ as mv2_all
+from .mobilenetv3 import QuantizableMobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all
 
-__all__ = mv2_all
+__all__ = mv2_all + mv3_all
diff --git a/torchvision/models/quantization/mobilenetv3.py b/torchvision/models/quantization/mobilenetv3.py
@@ -0,0 +1,141 @@
+from torch import nn, Tensor
+from torchvision.models.utils import load_state_dict_from_url
+from torchvision.models.mobilenetv3 import InvertedResidual, InvertedResidualConfig, ConvBNActivation, MobileNetV3,\
+    SqueezeExcitation, model_urls, _mobilenet_v3_conf
+from torch.quantization import QuantStub, DeQuantStub, fuse_modules
+from typing import Any, List
+from .utils import _replace_relu, quantize_model
+
+
+__all__ = ['QuantizableMobileNetV3', 'mobilenet_v3_large', 'mobilenet_v3_small']
+
+# TODO: Add URLs
+quant_model_urls = {
+    'mobilenet_v3_large_qnnpack': None,
+    'mobilenet_v3_small_qnnpack': None,
+}
+
+
+class QuantizableSqueezeExcitation(SqueezeExcitation):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.skip_mul = nn.quantized.FloatFunctional()
+
+    def forward(self, input: Tensor) -> Tensor:
+        return self.skip_mul.mul(self._scale(input, False), input)
+
+    def fuse_model(self):
+        fuse_modules(self, ['fc1', 'relu'], inplace=True)
+
+
+class QuantizableInvertedResidual(InvertedResidual):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, se_layer=QuantizableSqueezeExcitation, **kwargs)
+        self.skip_add = nn.quantized.FloatFunctional()
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return self.skip_add.add(x, self.block(x))
+        else:
+            return self.block(x)
+
+    def fuse_model(self):
+        for idx in range(len(self.block)):
+            if type(self.block[idx]) == SqueezeExcitation:
+                fuse_modules(self.block[idx], ['fc1', 'relu'], inplace=True)
+
+
+class QuantizableMobileNetV3(MobileNetV3):
+    def __init__(self, *args, **kwargs):
+        """
+        MobileNet V3 main class
+
+        Args:
+           Inherits args from floating point MobileNetV3
+        """
+        super().__init__(*args, **kwargs)
+        self.quant = QuantStub()
+        self.dequant = DeQuantStub()
+
+    def forward(self, x):
+        x = self.quant(x)
+        x = self._forward_impl(x)
+        x = self.dequant(x)
+        return x
+
+    def fuse_model(self):
+        for m in self.modules():
+            if type(m) == ConvBNActivation:
+                modules_to_fuse = ['0', '1']
+                if type(m[2]) == nn.ReLU:
+                    modules_to_fuse.append('2')
+                fuse_modules(m, modules_to_fuse, inplace=True)
+            elif type(m) in {QuantizableInvertedResidual, QuantizableSqueezeExcitation}:
+                m.fuse_model()
+
+
+def _mobilenet_v3_model(
+    arch: str,
+    inverted_residual_setting: List[InvertedResidualConfig],
+    last_channel: int,
+    pretrained: bool,
+    progress: bool,
+    quantize: bool,
+    **kwargs: Any
+):
+    model = QuantizableMobileNetV3(inverted_residual_setting, last_channel, block=QuantizableInvertedResidual, **kwargs)
+    _replace_relu(model)
+
+    if quantize:
+        backend = 'qnnpack'
+        quantize_model(model, backend)
+        model_url = quant_model_urls.get(arch + '_' + backend, None)
+    else:
+        assert pretrained in [True, False]
+        model_url = model_urls.get(arch, None)
+
+    if pretrained:
+        if model_url is None:
+            raise ValueError("No checkpoint is available for {}".format(arch))
+        state_dict = load_state_dict_from_url(model_url, progress=progress)
+        model.load_state_dict(state_dict)
+
+    return model
+
+
+def mobilenet_v3_large(pretrained=False, progress=True, quantize=False, **kwargs):
+    """
+    Constructs a MobileNetV3 Large architecture from
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
+
+    Note that quantize = True returns a quantized model with 8 bit
+    weights. Quantized models only support inference and run on CPUs.
+    GPU inference is not yet supported
+
+    Args:
+     pretrained (bool): If True, returns a model pre-trained on ImageNet.
+     progress (bool): If True, displays a progress bar of the download to stderr
+     quantize (bool): If True, returns a quantized model, else returns a float model
+    """
+    arch = "mobilenet_v3_large"
+    inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, kwargs)
+    return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, quantize, **kwargs)
+
+
+def mobilenet_v3_small(pretrained=False, progress=True, quantize=False, **kwargs):
+    """
+    Constructs a MobileNetV3 Small architecture from
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.
+
+    Note that quantize = True returns a quantized model with 8 bit
+    weights. Quantized models only support inference and run on CPUs.
+    GPU inference is not yet supported
+
+    Args:
+     pretrained (bool): If True, returns a model pre-trained on ImageNet.
+     progress (bool): If True, displays a progress bar of the download to stderr
+     quantize (bool): If True, returns a quantized model, else returns a float model
+    """
+    arch = "mobilenet_v3_small"
+    inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, kwargs)
+    return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, quantize, **kwargs)