diff --git a/torchvision/models/mobilenet.py b/torchvision/models/mobilenet.py index d90b3f8ef14..8be33d71a4e 100644 --- a/torchvision/models/mobilenet.py +++ b/torchvision/models/mobilenet.py @@ -1,207 +1 @@ -from torch import nn -from torch import Tensor -from .utils import load_state_dict_from_url -from typing import Callable, Any, Optional, List - - -__all__ = ['MobileNetV2', 'mobilenet_v2'] - - -model_urls = { - 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', -} - - -def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: - """ - This function is taken from the original tf repo. - It ensures that all layers have a channel number that is divisible by 8 - It can be seen here: - https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - :param v: - :param divisor: - :param min_value: - :return: - """ - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class ConvBNReLU(nn.Sequential): - def __init__( - self, - in_planes: int, - out_planes: int, - kernel_size: int = 3, - stride: int = 1, - groups: int = 1, - norm_layer: Optional[Callable[..., nn.Module]] = None - ) -> None: - padding = (kernel_size - 1) // 2 - if norm_layer is None: - norm_layer = nn.BatchNorm2d - super(ConvBNReLU, self).__init__( - nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), - norm_layer(out_planes), - nn.ReLU6(inplace=True) - ) - - -class InvertedResidual(nn.Module): - def __init__( - self, - inp: int, - oup: int, - stride: int, - expand_ratio: int, - norm_layer: Optional[Callable[..., nn.Module]] = None - ) -> None: - super(InvertedResidual, self).__init__() - self.stride = stride - assert stride in [1, 2] - - if norm_layer is None: - norm_layer = nn.BatchNorm2d - - hidden_dim = int(round(inp * expand_ratio)) - self.use_res_connect = self.stride == 1 and inp == oup - - layers: List[nn.Module] = [] - if expand_ratio != 1: - # pw - layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) - layers.extend([ - # dw - ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer), - # pw-linear - nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), - norm_layer(oup), - ]) - self.conv = nn.Sequential(*layers) - - def forward(self, x: Tensor) -> Tensor: - if self.use_res_connect: - return x + self.conv(x) - else: - return self.conv(x) - - -class MobileNetV2(nn.Module): - def __init__( - self, - num_classes: int = 1000, - width_mult: float = 1.0, - inverted_residual_setting: Optional[List[List[int]]] = None, - round_nearest: int = 8, - block: Optional[Callable[..., nn.Module]] = None, - norm_layer: Optional[Callable[..., nn.Module]] = None - ) -> None: - """ - MobileNet V2 main class - - Args: - num_classes (int): Number of classes - width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount - inverted_residual_setting: Network structure - round_nearest (int): Round the number of channels in each layer to be a multiple of this number - Set to 1 to turn off rounding - block: Module specifying inverted residual building block for mobilenet - norm_layer: Module specifying the normalization layer to use - - """ - super(MobileNetV2, self).__init__() - - if block is None: - block = InvertedResidual - - if norm_layer is None: - norm_layer = nn.BatchNorm2d - - input_channel = 32 - last_channel = 1280 - - if inverted_residual_setting is None: - inverted_residual_setting = [ - # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], - [6, 32, 3, 2], - [6, 64, 4, 2], - [6, 96, 3, 1], - [6, 160, 3, 2], - [6, 320, 1, 1], - ] - - # only check the first element, assuming user knows t,c,n,s are required - if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: - raise ValueError("inverted_residual_setting should be non-empty " - "or a 4-element list, got {}".format(inverted_residual_setting)) - - # building first layer - input_channel = _make_divisible(input_channel * width_mult, round_nearest) - self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) - features: List[nn.Module] = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] - # building inverted residual blocks - for t, c, n, s in inverted_residual_setting: - output_channel = _make_divisible(c * width_mult, round_nearest) - for i in range(n): - stride = s if i == 0 else 1 - features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) - input_channel = output_channel - # building last several layers - features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer)) - # make it nn.Sequential - self.features = nn.Sequential(*features) - - # building classifier - self.classifier = nn.Sequential( - nn.Dropout(0.2), - nn.Linear(self.last_channel, num_classes), - ) - - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - - def _forward_impl(self, x: Tensor) -> Tensor: - # This exists since TorchScript doesn't support inheritance, so the superclass method - # (this one) needs to have a name other than `forward` that can be accessed in a subclass - x = self.features(x) - # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] - x = nn.functional.adaptive_avg_pool2d(x, (1, 1)).reshape(x.shape[0], -1) - x = self.classifier(x) - return x - - def forward(self, x: Tensor) -> Tensor: - return self._forward_impl(x) - - -def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2: - """ - Constructs a MobileNetV2 architecture from - `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - model = MobileNetV2(**kwargs) - if pretrained: - state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], - progress=progress) - model.load_state_dict(state_dict) - return model +from .mobilenetv2 import MobileNetV2, mobilenet_v2 diff --git a/torchvision/models/mobilenetv2.py b/torchvision/models/mobilenetv2.py new file mode 100644 index 00000000000..990429bacf9 --- /dev/null +++ b/torchvision/models/mobilenetv2.py @@ -0,0 +1,214 @@ +from torch import nn +from torch import Tensor +from .utils import load_state_dict_from_url +from typing import Callable, Any, Optional, List + + +__all__ = ['MobileNetV2', 'mobilenet_v2'] + + +model_urls = { + 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', +} + + +def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNActivation(nn.Sequential): + def __init__( + self, + in_planes: int, + out_planes: int, + kernel_size: int = 3, + stride: int = 1, + groups: int = 1, + norm_layer: Optional[Callable[..., nn.Module]] = None, + activation_layer: Optional[Callable[..., nn.Module]] = None, + ) -> None: + padding = (kernel_size - 1) // 2 + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if activation_layer is None: + activation_layer = nn.ReLU6 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + norm_layer(out_planes), + activation_layer(inplace=True) + ) + + +# necessary for backwards compatibility +ConvBNReLU = ConvBNActivation + + +class InvertedResidual(nn.Module): + def __init__( + self, + inp: int, + oup: int, + stride: int, + expand_ratio: int, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers: List[nn.Module] = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + norm_layer(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x: Tensor) -> Tensor: + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__( + self, + num_classes: int = 1000, + width_mult: float = 1.0, + inverted_residual_setting: Optional[List[List[int]]] = None, + round_nearest: int = 8, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + norm_layer: Module specifying the normalization layer to use + + """ + super(MobileNetV2, self).__init__() + + if block is None: + block = InvertedResidual + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features: List[nn.Module] = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + x = self.features(x) + # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] + x = nn.functional.adaptive_avg_pool2d(x, (1, 1)).reshape(x.shape[0], -1) + x = self.classifier(x) + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2: + """ + Constructs a MobileNetV2 architecture from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MobileNetV2(**kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/torchvision/models/quantization/mobilenet.py b/torchvision/models/quantization/mobilenet.py index 1d14410f376..8be33d71a4e 100644 --- a/torchvision/models/quantization/mobilenet.py +++ b/torchvision/models/quantization/mobilenet.py @@ -1,94 +1 @@ -from torch import nn -from torchvision.models.utils import load_state_dict_from_url -from torchvision.models.mobilenet import InvertedResidual, ConvBNReLU, MobileNetV2, model_urls -from torch.quantization import QuantStub, DeQuantStub, fuse_modules -from .utils import _replace_relu, quantize_model - - -__all__ = ['QuantizableMobileNetV2', 'mobilenet_v2'] - -quant_model_urls = { - 'mobilenet_v2_qnnpack': - 'https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth' -} - - -class QuantizableInvertedResidual(InvertedResidual): - def __init__(self, *args, **kwargs): - super(QuantizableInvertedResidual, self).__init__(*args, **kwargs) - self.skip_add = nn.quantized.FloatFunctional() - - def forward(self, x): - if self.use_res_connect: - return self.skip_add.add(x, self.conv(x)) - else: - return self.conv(x) - - def fuse_model(self): - for idx in range(len(self.conv)): - if type(self.conv[idx]) == nn.Conv2d: - fuse_modules(self.conv, [str(idx), str(idx + 1)], inplace=True) - - -class QuantizableMobileNetV2(MobileNetV2): - def __init__(self, *args, **kwargs): - """ - MobileNet V2 main class - - Args: - Inherits args from floating point MobileNetV2 - """ - super(QuantizableMobileNetV2, self).__init__(*args, **kwargs) - self.quant = QuantStub() - self.dequant = DeQuantStub() - - def forward(self, x): - x = self.quant(x) - x = self._forward_impl(x) - x = self.dequant(x) - return x - - def fuse_model(self): - for m in self.modules(): - if type(m) == ConvBNReLU: - fuse_modules(m, ['0', '1', '2'], inplace=True) - if type(m) == QuantizableInvertedResidual: - m.fuse_model() - - -def mobilenet_v2(pretrained=False, progress=True, quantize=False, **kwargs): - """ - Constructs a MobileNetV2 architecture from - `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" - `_. - - Note that quantize = True returns a quantized model with 8 bit - weights. Quantized models only support inference and run on CPUs. - GPU inference is not yet supported - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet. - progress (bool): If True, displays a progress bar of the download to stderr - quantize(bool): If True, returns a quantized model, else returns a float model - """ - model = QuantizableMobileNetV2(block=QuantizableInvertedResidual, **kwargs) - _replace_relu(model) - - if quantize: - # TODO use pretrained as a string to specify the backend - backend = 'qnnpack' - quantize_model(model, backend) - else: - assert pretrained in [True, False] - - if pretrained: - if quantize: - model_url = quant_model_urls['mobilenet_v2_' + backend] - else: - model_url = model_urls['mobilenet_v2'] - - state_dict = load_state_dict_from_url(model_url, - progress=progress) - - model.load_state_dict(state_dict) - return model +from .mobilenetv2 import MobileNetV2, mobilenet_v2 diff --git a/torchvision/models/quantization/mobilenetv2.py b/torchvision/models/quantization/mobilenetv2.py new file mode 100644 index 00000000000..72c522a2e46 --- /dev/null +++ b/torchvision/models/quantization/mobilenetv2.py @@ -0,0 +1,94 @@ +from torch import nn +from torchvision.models.utils import load_state_dict_from_url +from torchvision.models.mobilenetv2 import InvertedResidual, ConvBNReLU, MobileNetV2, model_urls +from torch.quantization import QuantStub, DeQuantStub, fuse_modules +from .utils import _replace_relu, quantize_model + + +__all__ = ['QuantizableMobileNetV2', 'mobilenet_v2'] + +quant_model_urls = { + 'mobilenet_v2_qnnpack': + 'https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth' +} + + +class QuantizableInvertedResidual(InvertedResidual): + def __init__(self, *args, **kwargs): + super(QuantizableInvertedResidual, self).__init__(*args, **kwargs) + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + if self.use_res_connect: + return self.skip_add.add(x, self.conv(x)) + else: + return self.conv(x) + + def fuse_model(self): + for idx in range(len(self.conv)): + if type(self.conv[idx]) == nn.Conv2d: + fuse_modules(self.conv, [str(idx), str(idx + 1)], inplace=True) + + +class QuantizableMobileNetV2(MobileNetV2): + def __init__(self, *args, **kwargs): + """ + MobileNet V2 main class + + Args: + Inherits args from floating point MobileNetV2 + """ + super(QuantizableMobileNetV2, self).__init__(*args, **kwargs) + self.quant = QuantStub() + self.dequant = DeQuantStub() + + def forward(self, x): + x = self.quant(x) + x = self._forward_impl(x) + x = self.dequant(x) + return x + + def fuse_model(self): + for m in self.modules(): + if type(m) == ConvBNReLU: + fuse_modules(m, ['0', '1', '2'], inplace=True) + if type(m) == QuantizableInvertedResidual: + m.fuse_model() + + +def mobilenet_v2(pretrained=False, progress=True, quantize=False, **kwargs): + """ + Constructs a MobileNetV2 architecture from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" + `_. + + Note that quantize = True returns a quantized model with 8 bit + weights. Quantized models only support inference and run on CPUs. + GPU inference is not yet supported + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. + progress (bool): If True, displays a progress bar of the download to stderr + quantize(bool): If True, returns a quantized model, else returns a float model + """ + model = QuantizableMobileNetV2(block=QuantizableInvertedResidual, **kwargs) + _replace_relu(model) + + if quantize: + # TODO use pretrained as a string to specify the backend + backend = 'qnnpack' + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + model_url = quant_model_urls['mobilenet_v2_' + backend] + else: + model_url = model_urls['mobilenet_v2'] + + state_dict = load_state_dict_from_url(model_url, + progress=progress) + + model.load_state_dict(state_dict) + return model