Closed
Description
🚀 Feature
Hi I want to write mobilenet fpn.
Motivation
Improve MaskRCNN speed and accuracy.
Pitch
Alternatives
Additional context
Code:
/torchvision/models/detection/backbone_utils.py
from collections import OrderedDict
from torch import nn
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.ops import misc as misc_nn_ops
from .._utils import IntermediateLayerGetter
from .. import resnet
from .. import mobilenet_v2
from torchvision.models import mobilenet_v2 as MobileNetV2
class BackboneWithFPN(nn.Sequential):
def __init__(self, backbone, return_layers, in_channels_list, out_channels):
body = IntermediateLayerGetter(backbone, return_layers=return_layers)
fpn = FeaturePyramidNetwork(
in_channels_list=in_channels_list,
out_channels=out_channels,
extra_blocks=LastLevelMaxPool(),
)
super(BackboneWithFPN, self).__init__(OrderedDict(
[("body", body), ("fpn", fpn)]))
self.out_channels = out_channels
def resnet_fpn_backbone(backbone_name, pretrained):
backbone = resnet.__dict__[backbone_name](
pretrained=pretrained,
norm_layer=misc_nn_ops.FrozenBatchNorm2d)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_stage2 = backbone.inplanes // 8
in_channels_list = [
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
class FPNMobileNet(nn.Module):
def __init__(self, pretrained=True):
super().__init__()
net = MobileNetV2(pretrained)
self.features = net.features
self.layer1= nn.Sequential(*self.features[0:4])
self.layer2 = nn.Sequential(*self.features[4:7])
self.layer3 = nn.Sequential(*self.features[7:11])
self.layer4 = nn.Sequential(*self.features[11:19])
for param in self.features.parameters():
param.requires_grad = False
def forward(self, x):
# Bottom-up pathway, from ResNet
enc0 = self.layer1(x)
enc1 = self.layer2(enc0) # 256
enc2 = self.layer3(enc1) # 512
enc3 = self.layer4(enc2) # 1024
return enc3
def mobilenet_fpn_backbone(pretrained):
backbone = FPNMobileNet(pretrained)
print(backbone)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_stage2 =1280 // 8
in_channels_list = [
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
/torchvision/models/detection/mobilenet_fpn.py
from .backbone_utils import mobilenet_fpn_backbone
def fpn(pretrained = True):
backbone = mobilenet_fpn_backbone( pretrained)
return backbone
demo.py
backbone = mobilenet_fpn.fpn(True)
backbone.eval()
x = torch.rand(1,3, 100, 100)
out = backbone(x)
print(out)
Bug:
"RuntimeError: Given groups=1, weight of size 32 3 3 3, expected input[1, 1280, 4, 4] to have 3 channels, but got 1280 channels instead"