pytorch
diff --git a/‎docs/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎docs/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/ops.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs/source/ops.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎mypy.ini‎
Lines changed: 0 additions & 4 deletions b/‎mypy.ini‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎references/classification/README.md‎
Lines changed: 14 additions & 9 deletions b/‎references/classification/README.md‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎references/classification/train.py‎
Lines changed: 1 addition & 1 deletion b/‎references/classification/train.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎references/classification/train_quantization.py‎
Lines changed: 15 additions & 1 deletion b/‎references/classification/train_quantization.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎test/test_prototype_models.py‎
Lines changed: 19 additions & 4 deletions b/‎test/test_prototype_models.py‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎torchvision/models/detection/generalized_rcnn.py‎
Lines changed: 14 additions & 7 deletions b/‎torchvision/models/detection/generalized_rcnn.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎torchvision/ops/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎torchvision/ops/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎torchvision/ops/misc.py‎
Lines changed: 33 additions & 12 deletions b/‎torchvision/ops/misc.py‎
Lines changed: 33 additions & 12 deletions
@@ -3,4 +3,4 @@ numpy
 sphinx-copybutton>=0.3.1
 sphinx-gallery>=0.9.0
 sphinx==3.5.4
--e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
@@ -43,3 +43,6 @@ Operators
     MultiScaleRoIAlign
     FeaturePyramidNetwork
     StochasticDepth
+    FrozenBatchNorm2d
+    ConvNormActivation
+    SqueezeExcitation
@@ -46,10 +46,6 @@ ignore_errors = True
 
 ignore_errors = True
 
-[mypy-torchvision.models.detection.generalized_rcnn]
-
-ignore_errors = True
-
 [mypy-torchvision.models.detection.faster_rcnn]
 
 ignore_errors = True
 
@@ -42,20 +42,20 @@ torchrun --nproc_per_node=8 train.py --model inception_v3\
       --val-resize-size 342 --val-crop-size 299 --train-crop-size 299 --test-only --pretrained
 ```
 
-### ResNext-50 32x4d
+### ResNet
 ```
-torchrun --nproc_per_node=8 train.py\
-    --model resnext50_32x4d --epochs 100
+torchrun --nproc_per_node=8 train.py --model $MODEL
 ```
 
+Here `$MODEL` is one of `resnet18`, `resnet34`, `resnet50`, `resnet101` or `resnet152`.
 
-### ResNext-101 32x8d
-
+### ResNext
 ```
 torchrun --nproc_per_node=8 train.py\
-    --model resnext101_32x8d --epochs 100
+    --model $MODEL --epochs 100
 ```
 
+Here `$MODEL` is one of `resnext50_32x4d` or `resnext101_32x8d`.
 Note that the above command corresponds to a single node with 8 GPUs. If you use
 a different number of GPUs and/or a different batch size, then the learning rate
 should be scaled accordingly. For example, the pretrained model provided by
@@ -151,9 +151,9 @@ torchrun --nproc_per_node=8 train.py\
 
 ## Quantized
 
-### Parameters used for generating quantized models:
+### Post training quantized models
 
-For all post training quantized models (All quantized models except mobilenet-v2), the settings are:
+For all post training quantized models, the settings are:
 
 1. num_calibration_batches: 32
 2. num_workers: 16
@@ -162,8 +162,11 @@ For all post training quantized models (All quantized models except mobilenet-v2
 5. backend: 'fbgemm'
 
 ```
-python train_quantization.py --device='cpu' --post-training-quantize --backend='fbgemm' --model='<model_name>'
+python train_quantization.py --device='cpu' --post-training-quantize --backend='fbgemm' --model='$MODEL'
 ```
+Here `$MODEL` is one of `googlenet`, `inception_v3`, `resnet18`, `resnet50`, `resnext101_32x8d` and `shufflenet_v2_x1_0`.
+
+### QAT MobileNetV2
 
 For Mobilenet-v2, the model was trained with quantization aware training, the settings used are:
 1. num_workers: 16
@@ -185,6 +188,8 @@ torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v2'
 
 Training converges at about 10 epochs.
 
+### QAT MobileNetV3
+
 For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are:
 1. num_workers: 16
 2. batch_size: 32
 
@@ -157,7 +157,7 @@ def load_data(traindir, valdir, args):
                 crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation
             )
         else:
-            fn = PM.__dict__[args.model]
+            fn = PM.quantization.__dict__[args.model] if hasattr(args, "backend") else PM.__dict__[args.model]
             weights = PM._api.get_weight(fn, args.weights)
             preprocessing = weights.transforms()
 
 
@@ -12,6 +12,12 @@
 from train import train_one_epoch, evaluate, load_data
 
 
+try:
+    from torchvision.prototype import models as PM
+except ImportError:
+    PM = None
+
+
 def main(args):
     if args.output_dir:
         utils.mkdir(args.output_dir)
@@ -46,7 +52,12 @@ def main(args):
 
     print("Creating model", args.model)
     # when training quantized models, we always start from a pre-trained fp32 reference model
-    model = torchvision.models.quantization.__dict__[args.model](pretrained=True, quantize=args.test_only)
+    if not args.weights:
+        model = torchvision.models.quantization.__dict__[args.model](pretrained=True, quantize=args.test_only)
+    else:
+        if PM is None:
+            raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+        model = PM.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
     model.to(device)
 
     if not (args.test_only or args.post_training_quantize):
@@ -251,6 +262,9 @@ def get_args_parser(add_help=True):
         "--train-crop-size", default=224, type=int, help="the random crop size used for training (default: 224)"
     )
 
+    # Prototype models only
+    parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
+
     return parser
 
 
 
@@ -30,10 +30,15 @@ def get_models_with_module_names(module):
     return [(fn, module_name) for fn in TM.get_models_from_module(module)]
 
 
-def test_get_weight():
-    fn = models.resnet50
-    weight_name = "ImageNet1K_RefV2"
-    assert models._api.get_weight(fn, weight_name) == models.ResNet50Weights.ImageNet1K_RefV2
+@pytest.mark.parametrize(
+    "model_fn, weight",
+    [
+        (models.resnet50, models.ResNet50Weights.ImageNet1K_RefV2),
+        (models.quantization.resnet50, models.quantization.QuantizedResNet50Weights.ImageNet1K_FBGEMM_RefV1),
+    ],
+)
+def test_get_weight(model_fn, weight):
+    assert models._api.get_weight(model_fn, weight.name) == weight
 
 
 @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models))
@@ -43,6 +48,12 @@ def test_classification_model(model_fn, dev):
     TM.test_classification_model(model_fn, dev)
 
 
+@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.quantization))
+@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled")
+def test_quantized_classification_model(model_fn):
+    TM.test_quantized_classification_model(model_fn)
+
+
 @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.segmentation))
 @pytest.mark.parametrize("dev", cpu_and_gpu())
 @pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled")
@@ -60,6 +71,7 @@ def test_video_model(model_fn, dev):
 @pytest.mark.parametrize(
     "model_fn, module_name",
     get_models_with_module_names(models)
+    + get_models_with_module_names(models.quantization)
     + get_models_with_module_names(models.segmentation)
     + get_models_with_module_names(models.video),
 )
@@ -70,6 +82,9 @@ def test_old_vs_new_factory(model_fn, module_name, dev):
         "models": {
             "input_shape": (1, 3, 224, 224),
         },
+        "quantization": {
+            "input_shape": (1, 3, 224, 224),
+        },
         "segmentation": {
             "input_shape": (1, 3, 520, 520),
         },
 
@@ -25,7 +25,7 @@ class GeneralizedRCNN(nn.Module):
             the model
     """
 
-    def __init__(self, backbone, rpn, roi_heads, transform):
+    def __init__(self, backbone: nn.Module, rpn: nn.Module, roi_heads: nn.Module, transform: nn.Module) -> None:
         super().__init__()
         _log_api_usage_once(self)
         self.transform = transform
@@ -36,19 +36,26 @@ def __init__(self, backbone, rpn, roi_heads, transform):
         self._has_warned = False
 
     @torch.jit.unused
-    def eager_outputs(self, losses, detections):
-        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]
+    def eager_outputs(
+        self,
+        losses: Dict[str, Tensor],
+        detections: List[Dict[str, Tensor]],
+    ) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]:
+
         if self.training:
             return losses
 
         return detections
 
-    def forward(self, images, targets=None):
-        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
+    def forward(
+        self,
+        images: List[Tensor],
+        targets: Optional[List[Dict[str, Tensor]]] = None,
+    ) -> Union[Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]], Dict[str, Tensor], List[Dict[str, Tensor]]]:
         """
         Args:
             images (list[Tensor]): images to be processed
-            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
+            targets (list[Dict[str, Tensor]]): ground-truth boxes present in the image (optional)
 
         Returns:
             result (list[BoxList] or dict[Tensor]): the output from the model.
@@ -97,7 +104,7 @@ def forward(self, images, targets=None):
             features = OrderedDict([("0", features)])
         proposals, proposal_losses = self.rpn(images, features, targets)
         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
-        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
+        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
 
         losses = {}
         losses.update(detector_losses)
 
@@ -13,6 +13,7 @@
 from .deform_conv import deform_conv2d, DeformConv2d
 from .feature_pyramid_network import FeaturePyramidNetwork
 from .focal_loss import sigmoid_focal_loss
+from .misc import FrozenBatchNorm2d, ConvNormActivation, SqueezeExcitation
 from .poolers import MultiScaleRoIAlign
 from .ps_roi_align import ps_roi_align, PSRoIAlign
 from .ps_roi_pool import ps_roi_pool, PSRoIPool
@@ -48,4 +49,7 @@
     "sigmoid_focal_loss",
     "stochastic_depth",
     "StochasticDepth",
+    "FrozenBatchNorm2d",
+    "ConvNormActivation",
+    "SqueezeExcitation",
 ]
@@ -1,13 +1,3 @@
-"""
-helper class that supports empty tensors on some nn functions.
-
-Ideally, add support directly in PyTorch to empty tensors in
-those functions.
-
-This can be removed once https://github.com/pytorch/pytorch/issues/12013
-is implemented
-"""
-
 import warnings
 from typing import Callable, List, Optional
 
@@ -53,8 +43,11 @@ def __init__(self, *args, **kwargs):
 # This is not in nn
 class FrozenBatchNorm2d(torch.nn.Module):
     """
-    BatchNorm2d where the batch statistics and the affine parameters
-    are fixed
+    BatchNorm2d where the batch statistics and the affine parameters are fixed
+
+    Args:
+        num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
+        eps (float): a value added to the denominator for numerical stability. Default: 1e-5
     """
 
     def __init__(
@@ -109,6 +102,23 @@ def __repr__(self) -> str:
 
 
 class ConvNormActivation(torch.nn.Sequential):
+    """
+    Configurable block used for Convolution-Normalzation-Activation blocks.
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
+        kernel_size: (int, optional): Size of the convolving kernel. Default: 3
+        stride (int, optional): Stride of the convolution. Default: 1
+        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+        norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
+        activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
+        dilation (int): Spacing between kernel elements. Default: 1
+        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
+
+    """
+
     def __init__(
         self,
         in_channels: int,
@@ -146,6 +156,17 @@ def __init__(
 
 
 class SqueezeExcitation(torch.nn.Module):
+    """
+    This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
+    Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3.
+
+    Args:
+        input_channels (int): Number of channels in the input image
+        squeeze_channels (int): Number of squeeze channels
+        activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
+        scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
+    """
+
     def __init__(
         self,
         input_channels: int,
Original file line number	Diff line number	Diff line change
`@@ -157,7 +157,7 @@ def load_data(traindir, valdir, args):`
`157`	`157`	`crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation`
`158`	`158`	`)`
`159`	`159`	`else:`
`160`		`- fn = PM.__dict__[args.model]`
	`160`	`+ fn = PM.quantization.__dict__[args.model] if hasattr(args, "backend") else PM.__dict__[args.model]`
`161`	`161`	`weights = PM._api.get_weight(fn, args.weights)`
`162`	`162`	`preprocessing = weights.transforms()`
`163`	`163`