From 2a901ab35d593ba2150d2070e25573749f35e41b Mon Sep 17 00:00:00 2001 From: frgfm Date: Thu, 29 Jul 2021 19:47:45 +0200 Subject: [PATCH 01/13] style: Added typing to models/video --- torchvision/models/video/resnet.py | 113 +++++++++++++++++++---------- 1 file changed, 74 insertions(+), 39 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index fc69188ef7a..4b401b2fa5e 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -1,4 +1,6 @@ +from torch import Tensor import torch.nn as nn +from typing import Tuple, Optional, Callable, List, Type, Any, Union from ..._internally_replaced_utils import load_state_dict_from_url @@ -13,12 +15,14 @@ class Conv3DSimple(nn.Conv3d): - def __init__(self, - in_planes, - out_planes, - midplanes=None, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: Optional[int] = None, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv3DSimple, self).__init__( in_channels=in_planes, @@ -29,18 +33,20 @@ def __init__(self, bias=False) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return stride, stride, stride class Conv2Plus1D(nn.Sequential): - def __init__(self, - in_planes, - out_planes, - midplanes, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: int, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv2Plus1D, self).__init__( nn.Conv3d(in_planes, midplanes, kernel_size=(1, 3, 3), stride=(1, stride, stride), padding=(0, padding, padding), @@ -52,18 +58,20 @@ def __init__(self, bias=False)) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return stride, stride, stride class Conv3DNoTemporal(nn.Conv3d): - def __init__(self, - in_planes, - out_planes, - midplanes=None, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: Optional[int] = None, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv3DNoTemporal, self).__init__( in_channels=in_planes, @@ -74,7 +82,7 @@ def __init__(self, bias=False) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return 1, stride, stride @@ -82,7 +90,14 @@ class BasicBlock(nn.Module): expansion = 1 - def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + def __init__( + self, + inplanes: int, + planes: int, + conv_builder: Callable, + stride: int = 1, + downsample: Optional[nn.Module] = None, + ) -> None: midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) super(BasicBlock, self).__init__() @@ -99,7 +114,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): self.downsample = downsample self.stride = stride - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: residual = x out = self.conv1(x) @@ -116,7 +131,14 @@ def forward(self, x): class Bottleneck(nn.Module): expansion = 4 - def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + def __init__( + self, + inplanes: int, + planes: int, + conv_builder: Callable, + stride: int = 1, + downsample: Optional[nn.Module] = None, + ) -> None: super(Bottleneck, self).__init__() midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) @@ -143,7 +165,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): self.downsample = downsample self.stride = stride - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: residual = x out = self.conv1(x) @@ -162,7 +184,7 @@ def forward(self, x): class BasicStem(nn.Sequential): """The default conv-batchnorm-relu stem """ - def __init__(self): + def __init__(self) -> None: super(BasicStem, self).__init__( nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False), @@ -173,7 +195,7 @@ def __init__(self): class R2Plus1dStem(nn.Sequential): """R(2+1)D stem is different than the default one as it uses separated 3D convolution """ - def __init__(self): + def __init__(self) -> None: super(R2Plus1dStem, self).__init__( nn.Conv3d(3, 45, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), @@ -189,9 +211,15 @@ def __init__(self): class VideoResNet(nn.Module): - def __init__(self, block, conv_makers, layers, - stem, num_classes=400, - zero_init_residual=False): + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck]], + conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal]]], + layers: List[int], + stem: nn.Module, + num_classes: int = 400, + zero_init_residual: bool = False, + ) -> None: """Generic resnet video generator. Args: @@ -221,9 +249,9 @@ def __init__(self, block, conv_makers, layers, if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): - nn.init.constant_(m.bn3.weight, 0) + nn.init.constant_(m.bn3.weight, 0) # type: ignore[union-attr, arg-type] - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: x = self.stem(x) x = self.layer1(x) @@ -238,7 +266,14 @@ def forward(self, x): return x - def _make_layer(self, block, conv_builder, planes, blocks, stride=1): + def _make_layer( + self, + block: Type[Union[BasicBlock, Bottleneck]], + conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal]], + planes: int, + blocks: int, + stride: int = 1 + ) -> nn.Sequential: downsample = None if stride != 1 or self.inplanes != planes * block.expansion: @@ -257,7 +292,7 @@ def _make_layer(self, block, conv_builder, planes, blocks, stride=1): return nn.Sequential(*layers) - def _initialize_weights(self): + def _initialize_weights(self) -> None: for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', @@ -272,7 +307,7 @@ def _initialize_weights(self): nn.init.constant_(m.bias, 0) -def _video_resnet(arch, pretrained=False, progress=True, **kwargs): +def _video_resnet(arch: str, pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: model = VideoResNet(**kwargs) if pretrained: @@ -282,7 +317,7 @@ def _video_resnet(arch, pretrained=False, progress=True, **kwargs): return model -def r3d_18(pretrained=False, progress=True, **kwargs): +def r3d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Construct 18 layer Resnet3D model as in https://arxiv.org/abs/1711.11248 @@ -302,7 +337,7 @@ def r3d_18(pretrained=False, progress=True, **kwargs): stem=BasicStem, **kwargs) -def mc3_18(pretrained=False, progress=True, **kwargs): +def mc3_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Constructor for 18 layer Mixed Convolution network as in https://arxiv.org/abs/1711.11248 @@ -316,12 +351,12 @@ def mc3_18(pretrained=False, progress=True, **kwargs): return _video_resnet('mc3_18', pretrained, progress, block=BasicBlock, - conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3, + conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3, # type: ignore[list-item] layers=[2, 2, 2, 2], stem=BasicStem, **kwargs) -def r2plus1d_18(pretrained=False, progress=True, **kwargs): +def r2plus1d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Constructor for the 18 layer deep R(2+1)D network as in https://arxiv.org/abs/1711.11248 From 2014481bb9f680d6c332934f6c2ec69d9f8cd926 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 10:19:46 +0200 Subject: [PATCH 02/13] style: Fixed typing --- torchvision/models/video/resnet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 4b401b2fa5e..8af92a5894e 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -213,8 +213,8 @@ class VideoResNet(nn.Module): def __init__( self, - block: Type[Union[BasicBlock, Bottleneck]], - conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal]]], + block: Type[nn.Module], + conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], stem: nn.Module, num_classes: int = 400, @@ -268,8 +268,8 @@ def forward(self, x: Tensor) -> Tensor: def _make_layer( self, - block: Type[Union[BasicBlock, Bottleneck]], - conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal]], + block: Type[nn.Module], + conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]], planes: int, blocks: int, stride: int = 1 From 4381c97ccba190a9fcf676f09e812f31c8dc6614 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 10:20:25 +0200 Subject: [PATCH 03/13] style: Fixed typing --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 8af92a5894e..5ab70b99795 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -216,7 +216,7 @@ def __init__( block: Type[nn.Module], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], - stem: nn.Module, + stem: nn.Module = BasicStem, num_classes: int = 400, zero_init_residual: bool = False, ) -> None: From 640a1d7db6c1a4353e4c2eb0e5ade3ef242fe33b Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 10:24:41 +0200 Subject: [PATCH 04/13] style: Fixed typing --- torchvision/models/video/resnet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 5ab70b99795..b1dc56e1c50 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -213,10 +213,10 @@ class VideoResNet(nn.Module): def __init__( self, - block: Type[nn.Module], + block: Type[Union[BasicBlock, Bottleneck]], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], - stem: nn.Module = BasicStem, + stem: Type[nn.Module] = BasicStem, num_classes: int = 400, zero_init_residual: bool = False, ) -> None: @@ -268,7 +268,7 @@ def forward(self, x: Tensor) -> Tensor: def _make_layer( self, - block: Type[nn.Module], + block: Type[Union[BasicBlock, Bottleneck]], conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]], planes: int, blocks: int, From 1de780eeb22544d6bc69eaeabaa76ad3cb6476ea Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 11:37:18 +0200 Subject: [PATCH 05/13] refactor: Removed default value for stem --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index b1dc56e1c50..ed85d89a8ff 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -216,7 +216,7 @@ def __init__( block: Type[Union[BasicBlock, Bottleneck]], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], - stem: Type[nn.Module] = BasicStem, + stem: Type[nn.Module], num_classes: int = 400, zero_init_residual: bool = False, ) -> None: From 8260f6129308ccbff5128f68fc532934c3a20ec8 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 11:37:29 +0200 Subject: [PATCH 06/13] docs: Fixed docstring of VideoResNet --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index ed85d89a8ff..f4578f6c253 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -226,7 +226,7 @@ def __init__( block (nn.Module): resnet building block conv_makers (list(functions)): generator function for each layer layers (List[int]): number of blocks per layer - stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. + stem (Type[nn.Module]): module class to instantiate the ResNet stem. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ From bd05b074f6fb524bce3f0385381c96a2b5567cca Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 12:52:13 +0200 Subject: [PATCH 07/13] style: Refactored typing --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index f4578f6c253..566cff48f72 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -216,7 +216,7 @@ def __init__( block: Type[Union[BasicBlock, Bottleneck]], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], - stem: Type[nn.Module], + stem: Callable[..., nn.Module], num_classes: int = 400, zero_init_residual: bool = False, ) -> None: From 5841a2bfb8ff7e31a63b1f718bb9c43f1c434dc4 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 12:52:21 +0200 Subject: [PATCH 08/13] docs: Fixed docstring --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 566cff48f72..ea8c0a8a571 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -226,7 +226,7 @@ def __init__( block (nn.Module): resnet building block conv_makers (list(functions)): generator function for each layer layers (List[int]): number of blocks per layer - stem (Type[nn.Module]): module class to instantiate the ResNet stem. + stem (Callable[..., nn.Module]): module specifying the ResNet stem. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ From 536dc5a21be2ae7c0fea8777875f17220cdfce78 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 14:21:07 +0200 Subject: [PATCH 09/13] style: Fixed typing --- torchvision/models/video/resnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index ea8c0a8a571..bbd0e9a3dec 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -94,7 +94,7 @@ def __init__( self, inplanes: int, planes: int, - conv_builder: Callable, + conv_builder: Callable[..., nn.Module], stride: int = 1, downsample: Optional[nn.Module] = None, ) -> None: @@ -135,7 +135,7 @@ def __init__( self, inplanes: int, planes: int, - conv_builder: Callable, + conv_builder: Callable[..., nn.Module], stride: int = 1, downsample: Optional[nn.Module] = None, ) -> None: From d238b235cdcdf83b6a588f64128a83d6e484c7ca Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 14:21:16 +0200 Subject: [PATCH 10/13] docs: Specified docstring --- torchvision/models/video/resnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index bbd0e9a3dec..c494e5115d8 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -224,7 +224,8 @@ def __init__( Args: block (nn.Module): resnet building block - conv_makers (list(functions)): generator function for each layer + conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): + generator function for each layer layers (List[int]): number of blocks per layer stem (Callable[..., nn.Module]): module specifying the ResNet stem. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. From 07ebd581a39085e9413d4cc8363ffc5632c8c536 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 15:36:24 +0200 Subject: [PATCH 11/13] typing: Fixed tying --- torchvision/models/video/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index c494e5115d8..56fb0bc8eaf 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -213,7 +213,7 @@ class VideoResNet(nn.Module): def __init__( self, - block: Type[Union[BasicBlock, Bottleneck]], + block: Callable[..., Union[BasicBlock, Bottleneck]], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], stem: Callable[..., nn.Module], From 0ec53589127d14aab86cb74c94f80201706423a1 Mon Sep 17 00:00:00 2001 From: frgfm Date: Mon, 23 Aug 2021 15:36:31 +0200 Subject: [PATCH 12/13] docs: Fixed docstring --- torchvision/models/video/resnet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 56fb0bc8eaf..1c3be2ff208 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -223,9 +223,9 @@ def __init__( """Generic resnet video generator. Args: - block (nn.Module): resnet building block - conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): - generator function for each layer + block (Callable[..., Union[BasicBlock, Bottleneck]])): resnet building block + conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator + function for each layer layers (List[int]): number of blocks per layer stem (Callable[..., nn.Module]): module specifying the ResNet stem. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. From 2c7a8240e9eb1ba47a095aebc9f2ad05e585d92e Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 23 Aug 2021 15:05:45 +0100 Subject: [PATCH 13/13] Undoing change. --- torchvision/models/video/resnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 1c3be2ff208..faf3b3bc4a8 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -213,7 +213,7 @@ class VideoResNet(nn.Module): def __init__( self, - block: Callable[..., Union[BasicBlock, Bottleneck]], + block: Type[Union[BasicBlock, Bottleneck]], conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], layers: List[int], stem: Callable[..., nn.Module], @@ -223,7 +223,7 @@ def __init__( """Generic resnet video generator. Args: - block (Callable[..., Union[BasicBlock, Bottleneck]])): resnet building block + block (Type[Union[BasicBlock, Bottleneck]]): resnet building block conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator function for each layer layers (List[int]): number of blocks per layer