open-mmlab · hhaAndroid · Feb 8, 2023 · Feb 6, 2023 · Feb 6, 2023 · Feb 6, 2023
diff --git a/configs/yolox/yolox_l_8xb8-300e_coco.py → configs/yolox/yolox_l_fast_8xb8-300e_coco.py b/configs/yolox/yolox_l_8xb8-300e_coco.py → configs/yolox/yolox_l_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 1.0
 widen_factor = 1.0

diff --git a/configs/yolox/yolox_m_8xb8-300e_coco.py → configs/yolox/yolox_m_fast_8xb8-300e_coco.py b/configs/yolox/yolox_m_8xb8-300e_coco.py → configs/yolox/yolox_m_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.67
 widen_factor = 0.75

diff --git a/configs/yolox/yolox_nano_8xb8-300e_coco.py → ...s/yolox/yolox_nano_fast_8xb8-300e_coco.py b/configs/yolox/yolox_nano_8xb8-300e_coco.py → ...s/yolox/yolox_nano_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_tiny_8xb8-300e_coco.py'
+_base_ = './yolox_tiny_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.33
 widen_factor = 0.25

diff --git a/configs/yolox/yolox_s_8xb8-300e_coco.py → configs/yolox/yolox_s_fast_8xb8-300e_coco.py b/configs/yolox/yolox_s_8xb8-300e_coco.py → configs/yolox/yolox_s_fast_8xb8-300e_coco.py
@@ -13,6 +13,8 @@
 val_batch_size_per_gpu = 1
 val_num_workers = 2
 
+persistent_workers = False
+
 max_epochs = 300
 num_last_epochs = 15
 
@@ -29,11 +31,11 @@
     # TODO: Waiting for mmengine support
     use_syncbn=False,
     data_preprocessor=dict(
-        type='mmdet.DetDataPreprocessor',
+        type='YOLOv5DetDataPreprocessor',
         pad_size_divisor=32,
         batch_augments=[
             dict(
-                type='mmdet.BatchSyncRandomResize',
+                type='BatchSyncRandomResize',
                 random_size_range=(480, 800),
                 size_divisor=32,
                 interval=10)
@@ -155,8 +157,9 @@
 train_dataloader = dict(
     batch_size=train_batch_size_per_gpu,
     num_workers=train_num_workers,
-    persistent_workers=True,
+    persistent_workers=persistent_workers,
     pin_memory=True,
+    collate_fn=dict(type='yolov5_collate'),
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
@@ -183,7 +186,7 @@
 val_dataloader = dict(
     batch_size=val_batch_size_per_gpu,
     num_workers=val_num_workers,
-    persistent_workers=True,
+    persistent_workers=persistent_workers,
     pin_memory=True,
     drop_last=False,
     sampler=dict(type='DefaultSampler', shuffle=False),

diff --git a/configs/yolox/yolox_tiny_8xb8-300e_coco.py → ...s/yolox/yolox_tiny_fast_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_8xb8-300e_coco.py → ...s/yolox/yolox_tiny_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.33
 widen_factor = 0.375

diff --git a/configs/yolox/yolox_x_8xb8-300e_coco.py → configs/yolox/yolox_x_fast_8xb8-300e_coco.py b/configs/yolox/yolox_x_8xb8-300e_coco.py → configs/yolox/yolox_x_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 1.33
 widen_factor = 1.25

diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -3,9 +3,13 @@
 from typing import List, Mapping, Sequence, Tuple, Union
 
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
+from mmengine.dist import barrier, broadcast, get_dist_info
 from mmdet.models import BatchSyncRandomResize
 from mmdet.models.data_preprocessors import DetDataPreprocessor
+from mmdet.structures import DetDataSample
+
 from mmengine import MessageHub, is_list_of
 from mmengine.structures import BaseDataElement
 from torch import Tensor
@@ -16,6 +20,77 @@
                  None]
 
 
+@MODELS.register_module()
+class BatchSyncRandomResize(nn.Module):
+    """Batch random resize which synchronizes the random size across ranks.
+
+    Args:
+        random_size_range (tuple): The multi-scale random range during
+            multi-scale training.
+        interval (int): The iter interval of change
+            image size. Defaults to 10.
+        size_divisor (int): Image size divisible factor.
+            Defaults to 32.
+    """
+
+    def __init__(self,
+                 random_size_range: Tuple[int, int],
+                 interval: int = 10,
+                 size_divisor: int = 32) -> None:
+        super().__init__()
+        self.rank, self.world_size = get_dist_info()
+        self._input_size = None
+        self._random_size_range = (round(random_size_range[0] / size_divisor),
+                                   round(random_size_range[1] / size_divisor))
+        self._interval = interval
+        self._size_divisor = size_divisor
+
+    def forward(
+        self, inputs: Tensor, data_samples: List[DetDataSample]
+    ) -> Tuple[Tensor, List[DetDataSample]]:
+        """resize a batch of images and bboxes to shape ``self._input_size``"""
+        h, w = inputs.shape[-2:]
+        inputs=inputs.float()
+        if self._input_size is None:
+            self._input_size = (h, w)
+        scale_y = self._input_size[0] / h
+        scale_x = self._input_size[1] / w
+        if scale_x != 1 or scale_y != 1:
+            inputs = F.interpolate(
+                inputs,
+                size=self._input_size,
+                mode='bilinear',
+                align_corners=False)
+            for data_sample in data_samples['bboxes_labels']:
+                img_shape = (int(h * scale_y), int(w * scale_x))
+                pad_shape = (int(h * scale_y), int(w * scale_x))
+                data_sample[2::2] = data_sample[2::2] * scale_x
+                data_sample[3::2] = data_sample[3::2] * scale_y
+
+        message_hub = MessageHub.get_current_instance()
+        if (message_hub.get_info('iter') + 1) % self._interval == 0:
+            self._input_size = self._get_random_size(
+                aspect_ratio=float(w / h), device=inputs.device)
+
+        return inputs, data_samples
+
+    def _get_random_size(self, aspect_ratio: float,
+                         device: torch.device) -> Tuple[int, int]:
+        """Randomly generate a shape in ``_random_size_range`` and broadcast to
+        all ranks."""
+        tensor = torch.LongTensor(2).to(device)
+        if self.rank == 0:
+            size = random.randint(*self._random_size_range)
+            size = (self._size_divisor * size,
+                    self._size_divisor * int(aspect_ratio * size))
+            tensor[0] = size[0]
+            tensor[1] = size[1]
+        barrier()
+        broadcast(tensor, 0)
+        input_size = (tensor[0].item(), tensor[1].item())
+        return input_size
+
+
 @MODELS.register_module()
 class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
     """Rewrite collate_fn to get faster training speed.
@@ -74,7 +149,7 @@ def forward(self, data: dict, training: bool = False) -> dict:
         if self.batch_augments is not None:
             for batch_aug in self.batch_augments:
                 inputs, data_samples = batch_aug(inputs, data_samples)
-
+        
         img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
         data_samples = {
             'bboxes_labels': data_samples['bboxes_labels'],

diff --git a/mmyolo/models/dense_heads/yolox_head.py b/mmyolo/models/dense_heads/yolox_head.py
@@ -296,7 +296,9 @@ def loss_by_feat(
         num_imgs = len(batch_img_metas)
         if batch_gt_instances_ignore is None:
             batch_gt_instances_ignore = [None] * num_imgs
-
+
+        batch_gt_instances = self.gt_instances_preprocess(batch_gt_instances, len(batch_img_metas))
+
         featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]
         mlvl_priors = self.prior_generator.grid_priors(
             featmap_sizes,
@@ -484,3 +486,30 @@ def _get_bbox_aux_target(self,
         bbox_aux_target[:,
                         2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps)
         return bbox_aux_target
+
+    @staticmethod
+    def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
+                                batch_size: int) -> List[InstanceData]:
+        """Split batch_gt_instances with batch size.
+
+        Args:
+            batch_gt_instances (Sequence[Tensor]): Ground truth
+                instances for whole batch, shape [all_gt_bboxes, 6]
+            batch_size (int): Batch size.
+
+        Returns:
+            List: batch gt instances data, shape [batch_size, InstanceData]
+        """
+        # faster version
+        # sqlit batch gt instance [all_gt_bboxes, 6] -> [InstanceData]
+        batch_instance_list = []
+        max_gt_bbox_len = 0
+        for i in range(batch_size):
+            batch_gt_instance_ = InstanceData()
+            single_batch_instance = \
+                batch_gt_instances[batch_gt_instances[:, 0] == i, :]
+            batch_gt_instance_.bboxes = single_batch_instance[:, 2:]
+            batch_gt_instance_.labels = single_batch_instance[:, 1]
+            batch_instance_list.append(batch_gt_instance_)
+
+        return batch_instance_list