lyuwenyu · lyuwenyu · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/rtdetr_pytorch/README.md b/rtdetr_pytorch/README.md
@@ -25,6 +25,8 @@ rtdetr_r101vd | COCO | 640 | 54.3 | 72.8 | 76 | 74 | [url<sup>*</sup>](https://g
 rtdetr_18vd | COCO+Objects365 | 640 | 49.0 | 66.5 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
 rtdetr_r50vd | COCO+Objects365 | 640 | 55.2 | 73.4 | 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
 rtdetr_r101vd | COCO+Objects365 | 640 | 56.2 | 74.5 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth)
+rtdetr_regnet | COCO | 640 | 51.6 | 69.6 | 38 | 67 | [url<sup>*</sup>](https://drive.google.com/file/d/1K2EXJgnaEUJcZCLULHrZ492EF4PdgVp9/view?usp=sharing)
+rtdetr_dla34 | COCO | 640 | 49.6 | 67.4  | 34 | 83 | [url<sup>*</sup>](https://drive.google.com/file/d/1_rVpl-jIelwy2LDT3E4vdM4KCLBcOtzZ/view?usp=sharing)
 
 Notes
 - `COCO + Objects365` in the table means finetuned model on `COCO` using pretrained weights trained on `Objects365`.

diff --git a/rtdetr_pytorch/configs/rtdetr/include/dataloader_regnet.yml b/rtdetr_pytorch/configs/rtdetr/include/dataloader_regnet.yml
@@ -0,0 +1,39 @@
+# num_classes: 91
+# remap_mscoco_category: True
+
+train_dataloader: 
+  dataset: 
+    return_masks: False
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBox, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        # - {type: Resize, size: 639, max_size: 640}
+        # - {type: PadToSize, spatial_size: 640}
+        - {type: ToImageTensor}
+        - {type: ConvertDtype}
+        - {type: SanitizeBoundingBox, min_size: 1}
+        - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
+  shuffle: True
+  batch_size: 8
+  num_workers: 2
+  collate_fn: default_collate_fn
+
+
+val_dataloader:
+  dataset: 
+    transforms:
+      ops: 
+        # - {type: Resize, size: 639, max_size: 640}
+        # - {type: PadToSize, spatial_size: 640}
+        - {type: Resize, size: [640, 640]}
+        - {type: ToImageTensor}
+        - {type: ConvertDtype}
+  shuffle: False
+  batch_size: 8
+  num_workers: 2
+  collate_fn: default_collate_fn
diff --git a/rtdetr_pytorch/configs/rtdetr/include/optimizer_regnet.yml b/rtdetr_pytorch/configs/rtdetr/include/optimizer_regnet.yml
@@ -0,0 +1,33 @@
+
+use_ema: True 
+ema:
+  type: ModelEMA
+  decay: 0.9999
+  warmups: 2000
+
+
+find_unused_parameters: True 
+
+epoches: 72
+clip_max_norm: 0.1
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
+      weight_decay: 0.
+    -
+      params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
+      weight_decay: 0.
+
+  lr: 0.0001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+lr_scheduler:
+  type: MultiStepLR
+  milestones: [1000]
+  gamma: 0.1
+
diff --git a/rtdetr_pytorch/configs/rtdetr/include/rtdetr_dla34.yml b/rtdetr_pytorch/configs/rtdetr/include/rtdetr_dla34.yml
@@ -0,0 +1,78 @@
+task: detection
+
+model: RTDETR
+criterion: SetCriterion
+postprocessor: RTDETRPostProcessor
+
+
+RTDETR: 
+  backbone: DLANet
+  encoder: HybridEncoder
+  decoder: RTDETRTransformer
+  multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+
+DLANet:
+  dla: dla34
+  pretrained: True
+  return_idx: [1, 2, 3]
+
+
+HybridEncoder:
+  in_channels: [128, 256, 512]
+  feat_strides: [8, 16, 32]
+
+  # intra
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.
+  enc_act: 'gelu'
+  pe_temperature: 10000
+
+  # cross
+  expansion: 1.0
+  depth_mult: 1
+  act: 'silu'
+
+  # eval
+  eval_spatial_size: [640, 640]
+
+
+RTDETRTransformer:
+  feat_channels: [256, 256, 256]
+  feat_strides: [8, 16, 32]
+  hidden_dim: 256
+  num_levels: 3
+
+  num_queries: 300
+
+  num_decoder_layers: 6
+  num_denoising: 100
+
+  eval_idx: -1
+  eval_spatial_size: [640, 640]
+
+
+use_focal_loss: True
+
+RTDETRPostProcessor:
+  num_top_queries: 300
+
+
+SetCriterion:
+  weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
+  losses: ['vfl', 'boxes', ]
+  alpha: 0.75
+  gamma: 2.0
+
+  matcher:
+    type: HungarianMatcher
+    weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
+    # use_focal_loss: True 
+    alpha: 0.25
+    gamma: 2.0
+
+
+
diff --git a/rtdetr_pytorch/configs/rtdetr/include/rtdetr_regnet.yml b/rtdetr_pytorch/configs/rtdetr/include/rtdetr_regnet.yml
@@ -0,0 +1,77 @@
+task: detection
+
+model: RTDETR
+criterion: SetCriterion
+postprocessor: RTDETRPostProcessor
+
+
+RTDETR: 
+  backbone: RegNet
+  encoder: HybridEncoder
+  decoder: RTDETRTransformer
+  multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+
+
+RegNet:
+  return_idx: [1, 2, 3]
+  configuration: RegNetConfig()
+
+HybridEncoder:
+  in_channels: [192, 512, 1088]
+  feat_strides: [8, 16, 32]
+
+  # intra
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.
+  enc_act: 'gelu'
+  pe_temperature: 10000
+
+  # cross
+  expansion: 1.0
+  depth_mult: 1
+  act: 'silu'
+
+  # eval
+  eval_spatial_size: [640, 640]
+
+
+RTDETRTransformer:
+  feat_channels: [256, 256, 256]
+  feat_strides: [8, 16, 32]
+  hidden_dim: 256
+  num_levels: 3
+
+  num_queries: 300
+
+  num_decoder_layers: 6
+  num_denoising: 100
+
+  eval_idx: -1
+  eval_spatial_size: [640, 640]
+
+
+use_focal_loss: True
+
+RTDETRPostProcessor:
+  num_top_queries: 300
+
+
+SetCriterion:
+  weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
+  losses: ['vfl', 'boxes', ]
+  alpha: 0.75
+  gamma: 2.0
+
+  matcher:
+    type: HungarianMatcher
+    weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
+    # use_focal_loss: True 
+    alpha: 0.25
+    gamma: 2.0
+
+
+
diff --git a/rtdetr_pytorch/configs/rtdetr/rtdetr_dla34_6x_coco.yml b/rtdetr_pytorch/configs/rtdetr/rtdetr_dla34_6x_coco.yml
@@ -0,0 +1,9 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/rtdetr_dla34.yml',
+]
+
+output_dir: ./output/rtdetr_dla34_6x_coco
diff --git a/rtdetr_pytorch/configs/rtdetr/rtdetr_regnet_6x_coco.yml b/rtdetr_pytorch/configs/rtdetr/rtdetr_regnet_6x_coco.yml
@@ -0,0 +1,9 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader_regnet.yml',
+  './include/optimizer_regnet.yml',
+  './include/rtdetr_regnet.yml',
+]
+
+output_dir: ./output/rtdetr_regnet_6x_coco
diff --git a/rtdetr_pytorch/src/nn/backbone/__init__.py b/rtdetr_pytorch/src/nn/backbone/__init__.py
@@ -1,5 +1,6 @@
 
 from .presnet import *
 from .test_resnet import *
-
-from .common import *
+from .regnet import *
+from .common import *
+from .dla import *