|
6 | 6 | from torch import Tensor
|
7 | 7 | from torch.nn.modules.batchnorm import BatchNorm2d
|
8 | 8 | from torch.nn.modules.instancenorm import InstanceNorm2d
|
9 |
| -from torchvision.ops.misc import ConvNormActivation |
| 9 | +from torchvision.ops import Conv2dNormActivation |
10 | 10 |
|
11 | 11 | from ..._internally_replaced_utils import load_state_dict_from_url
|
12 | 12 | from ...utils import _log_api_usage_once
|
@@ -38,17 +38,17 @@ def __init__(self, in_channels, out_channels, *, norm_layer, stride=1):
|
38 | 38 | # and frozen for the rest of the training process (i.e. set as eval()). The bias term is thus still useful
|
39 | 39 | # for the rest of the datasets. Technically, we could remove the bias for other norm layers like Instance norm
|
40 | 40 | # because these aren't frozen, but we don't bother (also, we woudn't be able to load the original weights).
|
41 |
| - self.convnormrelu1 = ConvNormActivation( |
| 41 | + self.convnormrelu1 = Conv2dNormActivation( |
42 | 42 | in_channels, out_channels, norm_layer=norm_layer, kernel_size=3, stride=stride, bias=True
|
43 | 43 | )
|
44 |
| - self.convnormrelu2 = ConvNormActivation( |
| 44 | + self.convnormrelu2 = Conv2dNormActivation( |
45 | 45 | out_channels, out_channels, norm_layer=norm_layer, kernel_size=3, bias=True
|
46 | 46 | )
|
47 | 47 |
|
48 | 48 | if stride == 1:
|
49 | 49 | self.downsample = nn.Identity()
|
50 | 50 | else:
|
51 |
| - self.downsample = ConvNormActivation( |
| 51 | + self.downsample = Conv2dNormActivation( |
52 | 52 | in_channels,
|
53 | 53 | out_channels,
|
54 | 54 | norm_layer=norm_layer,
|
@@ -77,21 +77,21 @@ def __init__(self, in_channels, out_channels, *, norm_layer, stride=1):
|
77 | 77 | super().__init__()
|
78 | 78 |
|
79 | 79 | # See note in ResidualBlock for the reason behind bias=True
|
80 |
| - self.convnormrelu1 = ConvNormActivation( |
| 80 | + self.convnormrelu1 = Conv2dNormActivation( |
81 | 81 | in_channels, out_channels // 4, norm_layer=norm_layer, kernel_size=1, bias=True
|
82 | 82 | )
|
83 |
| - self.convnormrelu2 = ConvNormActivation( |
| 83 | + self.convnormrelu2 = Conv2dNormActivation( |
84 | 84 | out_channels // 4, out_channels // 4, norm_layer=norm_layer, kernel_size=3, stride=stride, bias=True
|
85 | 85 | )
|
86 |
| - self.convnormrelu3 = ConvNormActivation( |
| 86 | + self.convnormrelu3 = Conv2dNormActivation( |
87 | 87 | out_channels // 4, out_channels, norm_layer=norm_layer, kernel_size=1, bias=True
|
88 | 88 | )
|
89 | 89 | self.relu = nn.ReLU(inplace=True)
|
90 | 90 |
|
91 | 91 | if stride == 1:
|
92 | 92 | self.downsample = nn.Identity()
|
93 | 93 | else:
|
94 |
| - self.downsample = ConvNormActivation( |
| 94 | + self.downsample = Conv2dNormActivation( |
95 | 95 | in_channels,
|
96 | 96 | out_channels,
|
97 | 97 | norm_layer=norm_layer,
|
@@ -124,7 +124,9 @@ def __init__(self, *, block=ResidualBlock, layers=(64, 64, 96, 128, 256), norm_l
|
124 | 124 | assert len(layers) == 5
|
125 | 125 |
|
126 | 126 | # See note in ResidualBlock for the reason behind bias=True
|
127 |
| - self.convnormrelu = ConvNormActivation(3, layers[0], norm_layer=norm_layer, kernel_size=7, stride=2, bias=True) |
| 127 | + self.convnormrelu = Conv2dNormActivation( |
| 128 | + 3, layers[0], norm_layer=norm_layer, kernel_size=7, stride=2, bias=True |
| 129 | + ) |
128 | 130 |
|
129 | 131 | self.layer1 = self._make_2_blocks(block, layers[0], layers[1], norm_layer=norm_layer, first_stride=1)
|
130 | 132 | self.layer2 = self._make_2_blocks(block, layers[1], layers[2], norm_layer=norm_layer, first_stride=2)
|
@@ -170,17 +172,17 @@ def __init__(self, *, in_channels_corr, corr_layers=(256, 192), flow_layers=(128
|
170 | 172 | assert len(flow_layers) == 2
|
171 | 173 | assert len(corr_layers) in (1, 2)
|
172 | 174 |
|
173 |
| - self.convcorr1 = ConvNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1) |
| 175 | + self.convcorr1 = Conv2dNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1) |
174 | 176 | if len(corr_layers) == 2:
|
175 |
| - self.convcorr2 = ConvNormActivation(corr_layers[0], corr_layers[1], norm_layer=None, kernel_size=3) |
| 177 | + self.convcorr2 = Conv2dNormActivation(corr_layers[0], corr_layers[1], norm_layer=None, kernel_size=3) |
176 | 178 | else:
|
177 | 179 | self.convcorr2 = nn.Identity()
|
178 | 180 |
|
179 |
| - self.convflow1 = ConvNormActivation(2, flow_layers[0], norm_layer=None, kernel_size=7) |
180 |
| - self.convflow2 = ConvNormActivation(flow_layers[0], flow_layers[1], norm_layer=None, kernel_size=3) |
| 181 | + self.convflow1 = Conv2dNormActivation(2, flow_layers[0], norm_layer=None, kernel_size=7) |
| 182 | + self.convflow2 = Conv2dNormActivation(flow_layers[0], flow_layers[1], norm_layer=None, kernel_size=3) |
181 | 183 |
|
182 | 184 | # out_channels - 2 because we cat the flow (2 channels) at the end
|
183 |
| - self.conv = ConvNormActivation( |
| 185 | + self.conv = Conv2dNormActivation( |
184 | 186 | corr_layers[-1] + flow_layers[-1], out_channels - 2, norm_layer=None, kernel_size=3
|
185 | 187 | )
|
186 | 188 |
|
@@ -301,7 +303,7 @@ class MaskPredictor(nn.Module):
|
301 | 303 |
|
302 | 304 | def __init__(self, *, in_channels, hidden_size, multiplier=0.25):
|
303 | 305 | super().__init__()
|
304 |
| - self.convrelu = ConvNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3) |
| 306 | + self.convrelu = Conv2dNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3) |
305 | 307 | # 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder
|
306 | 308 | # and we interpolate with all 9 surrounding neighbors. See paper and appendix B.
|
307 | 309 | self.conv = nn.Conv2d(hidden_size, 8 * 8 * 9, 1, padding=0)
|
|
0 commit comments