|
6 | 6 | from torch import Tensor |
7 | 7 | from torch.nn.modules.batchnorm import BatchNorm2d |
8 | 8 | from torch.nn.modules.instancenorm import InstanceNorm2d |
9 | | -from torchvision.ops.misc import ConvNormActivation |
| 9 | +from torchvision.ops import Conv2dNormActivation |
10 | 10 |
|
11 | 11 | from ..._internally_replaced_utils import load_state_dict_from_url |
12 | 12 | from ...utils import _log_api_usage_once |
@@ -38,17 +38,17 @@ def __init__(self, in_channels, out_channels, *, norm_layer, stride=1): |
38 | 38 | # and frozen for the rest of the training process (i.e. set as eval()). The bias term is thus still useful |
39 | 39 | # for the rest of the datasets. Technically, we could remove the bias for other norm layers like Instance norm |
40 | 40 | # because these aren't frozen, but we don't bother (also, we woudn't be able to load the original weights). |
41 | | - self.convnormrelu1 = ConvNormActivation( |
| 41 | + self.convnormrelu1 = Conv2dNormActivation( |
42 | 42 | in_channels, out_channels, norm_layer=norm_layer, kernel_size=3, stride=stride, bias=True |
43 | 43 | ) |
44 | | - self.convnormrelu2 = ConvNormActivation( |
| 44 | + self.convnormrelu2 = Conv2dNormActivation( |
45 | 45 | out_channels, out_channels, norm_layer=norm_layer, kernel_size=3, bias=True |
46 | 46 | ) |
47 | 47 |
|
48 | 48 | if stride == 1: |
49 | 49 | self.downsample = nn.Identity() |
50 | 50 | else: |
51 | | - self.downsample = ConvNormActivation( |
| 51 | + self.downsample = Conv2dNormActivation( |
52 | 52 | in_channels, |
53 | 53 | out_channels, |
54 | 54 | norm_layer=norm_layer, |
@@ -77,21 +77,21 @@ def __init__(self, in_channels, out_channels, *, norm_layer, stride=1): |
77 | 77 | super().__init__() |
78 | 78 |
|
79 | 79 | # See note in ResidualBlock for the reason behind bias=True |
80 | | - self.convnormrelu1 = ConvNormActivation( |
| 80 | + self.convnormrelu1 = Conv2dNormActivation( |
81 | 81 | in_channels, out_channels // 4, norm_layer=norm_layer, kernel_size=1, bias=True |
82 | 82 | ) |
83 | | - self.convnormrelu2 = ConvNormActivation( |
| 83 | + self.convnormrelu2 = Conv2dNormActivation( |
84 | 84 | out_channels // 4, out_channels // 4, norm_layer=norm_layer, kernel_size=3, stride=stride, bias=True |
85 | 85 | ) |
86 | | - self.convnormrelu3 = ConvNormActivation( |
| 86 | + self.convnormrelu3 = Conv2dNormActivation( |
87 | 87 | out_channels // 4, out_channels, norm_layer=norm_layer, kernel_size=1, bias=True |
88 | 88 | ) |
89 | 89 | self.relu = nn.ReLU(inplace=True) |
90 | 90 |
|
91 | 91 | if stride == 1: |
92 | 92 | self.downsample = nn.Identity() |
93 | 93 | else: |
94 | | - self.downsample = ConvNormActivation( |
| 94 | + self.downsample = Conv2dNormActivation( |
95 | 95 | in_channels, |
96 | 96 | out_channels, |
97 | 97 | norm_layer=norm_layer, |
@@ -124,7 +124,9 @@ def __init__(self, *, block=ResidualBlock, layers=(64, 64, 96, 128, 256), norm_l |
124 | 124 | assert len(layers) == 5 |
125 | 125 |
|
126 | 126 | # See note in ResidualBlock for the reason behind bias=True |
127 | | - self.convnormrelu = ConvNormActivation(3, layers[0], norm_layer=norm_layer, kernel_size=7, stride=2, bias=True) |
| 127 | + self.convnormrelu = Conv2dNormActivation( |
| 128 | + 3, layers[0], norm_layer=norm_layer, kernel_size=7, stride=2, bias=True |
| 129 | + ) |
128 | 130 |
|
129 | 131 | self.layer1 = self._make_2_blocks(block, layers[0], layers[1], norm_layer=norm_layer, first_stride=1) |
130 | 132 | self.layer2 = self._make_2_blocks(block, layers[1], layers[2], norm_layer=norm_layer, first_stride=2) |
@@ -170,17 +172,17 @@ def __init__(self, *, in_channels_corr, corr_layers=(256, 192), flow_layers=(128 |
170 | 172 | assert len(flow_layers) == 2 |
171 | 173 | assert len(corr_layers) in (1, 2) |
172 | 174 |
|
173 | | - self.convcorr1 = ConvNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1) |
| 175 | + self.convcorr1 = Conv2dNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1) |
174 | 176 | if len(corr_layers) == 2: |
175 | | - self.convcorr2 = ConvNormActivation(corr_layers[0], corr_layers[1], norm_layer=None, kernel_size=3) |
| 177 | + self.convcorr2 = Conv2dNormActivation(corr_layers[0], corr_layers[1], norm_layer=None, kernel_size=3) |
176 | 178 | else: |
177 | 179 | self.convcorr2 = nn.Identity() |
178 | 180 |
|
179 | | - self.convflow1 = ConvNormActivation(2, flow_layers[0], norm_layer=None, kernel_size=7) |
180 | | - self.convflow2 = ConvNormActivation(flow_layers[0], flow_layers[1], norm_layer=None, kernel_size=3) |
| 181 | + self.convflow1 = Conv2dNormActivation(2, flow_layers[0], norm_layer=None, kernel_size=7) |
| 182 | + self.convflow2 = Conv2dNormActivation(flow_layers[0], flow_layers[1], norm_layer=None, kernel_size=3) |
181 | 183 |
|
182 | 184 | # out_channels - 2 because we cat the flow (2 channels) at the end |
183 | | - self.conv = ConvNormActivation( |
| 185 | + self.conv = Conv2dNormActivation( |
184 | 186 | corr_layers[-1] + flow_layers[-1], out_channels - 2, norm_layer=None, kernel_size=3 |
185 | 187 | ) |
186 | 188 |
|
@@ -301,7 +303,7 @@ class MaskPredictor(nn.Module): |
301 | 303 |
|
302 | 304 | def __init__(self, *, in_channels, hidden_size, multiplier=0.25): |
303 | 305 | super().__init__() |
304 | | - self.convrelu = ConvNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3) |
| 306 | + self.convrelu = Conv2dNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3) |
305 | 307 | # 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder |
306 | 308 | # and we interpolate with all 9 surrounding neighbors. See paper and appendix B. |
307 | 309 | self.conv = nn.Conv2d(hidden_size, 8 * 8 * 9, 1, padding=0) |
|
0 commit comments