lyuwenyu · ramonhollands · May 1, 2024 · May 15, 2024 · May 18, 2024 · May 21, 2024
diff --git a/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml b/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml
@@ -20,6 +20,7 @@ HybridEncoder:
   in_channels: [128, 256, 512]
   hidden_dim: 256
   expansion: 0.5
+  depth_mult: 1.0
 
 
 RTDETRTransformer:

diff --git a/rtdetr_pytorch/configs/rtdetr/rtdetr_timm_mobilenetv3_coco.yml b/rtdetr_pytorch/configs/rtdetr/rtdetr_timm_mobilenetv3_coco.yml
@@ -0,0 +1,52 @@
+
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/rtdetr_r50vd.yml',
+]
+
+
+output_dir: ./output/rtdetr_timm_mobilenetv3_coco
+
+RTDETR: 
+  backbone: Timm
+
+Timm:
+  model_type: mobilenetv3_small_050.lamb_in1k
+
+HybridEncoder:
+  in_channels: [16,24,288]
+  hidden_dim: 256
+  expansion: 0.5
+  depth_mult: 1.0
+  # eval_spatial_size: [640, 640]
+
+
+RTDETRTransformer:
+  eval_idx: -1
+  num_decoder_layers: 3
+  num_denoising: 100
+  eval_spatial_size: [640, 640]
+
+
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*backbone)(?=.*norm).*$'
+      lr: 0.00001
+      weight_decay: 0.
+    - 
+      params: '^(?=.*backbone)(?!.*norm).*$'
+      lr: 0.00001
+    - 
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
diff --git a/rtdetr_pytorch/configs/rtdetr/rtdetr_timm_resnet18_coco.yml b/rtdetr_pytorch/configs/rtdetr/rtdetr_timm_resnet18_coco.yml
@@ -0,0 +1,52 @@
+
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/rtdetr_r50vd.yml',
+]
+
+
+output_dir: ./output/rtdetr_timm_resnet18_coco
+
+RTDETR: 
+  backbone: Timm
+
+Timm:
+  model_type: resnet18.fb_swsl_ig1b_ft_in1k
+
+HybridEncoder:
+  in_channels: [16,24,288]
+  hidden_dim: 256
+  expansion: 0.5
+  depth_mult: 1.0
+  eval_spatial_size: [640, 640]
+
+
+RTDETRTransformer:
+  eval_idx: -1
+  num_decoder_layers: 3
+  num_denoising: 100
+  # eval_spatial_size: [640, 640]
+
+
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*backbone)(?=.*norm).*$'
+      lr: 0.00001
+      weight_decay: 0.
+    - 
+      params: '^(?=.*backbone)(?!.*norm).*$'
+      lr: 0.00001
+    - 
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
diff --git a/rtdetr_pytorch/src/nn/backbone/__init__.py b/rtdetr_pytorch/src/nn/backbone/__init__.py
@@ -1,5 +1,5 @@
 
 from .presnet import *
 from .test_resnet import *
-
+from .timm import *
 from .common import *
diff --git a/rtdetr_pytorch/src/nn/backbone/timm.py b/rtdetr_pytorch/src/nn/backbone/timm.py
@@ -0,0 +1,25 @@
+from src.core import register
+import timm
+import torch.nn as nn
+
+__all__ = ['Timm']
+
+@register
+class Timm(nn.Module):
+    def __init__(
+            self,
+            model_type : str = 'mobilenetv3_small_050.lamb_in1k'
+        ):
+        super().__init__()
+        self.model = timm.create_model(model_type, features_only=True, pretrained=True)
+
+
+    def forward(self, x):
+        o = self.model(x)
+
+        outputs = []
+
+        for x in o[2:]:
+            outputs.append(x)
+
+        return outputs
diff --git a/rtdetr_pytorch/tools/export_onnx.py b/rtdetr_pytorch/tools/export_onnx.py
@@ -11,7 +11,7 @@
 from src.core import YAMLConfig
 
 import torch
-import torch.nn as nn 
+import torch.nn as nn
 
 
 def main(args, ):
@@ -20,7 +20,7 @@ def main(args, ):
     cfg = YAMLConfig(args.config, resume=args.resume)
 
     if args.resume:
-        checkpoint = torch.load(args.resume, map_location='cpu') 
+        checkpoint = torch.load(args.resume, map_location='cpu')
         if 'ema' in checkpoint:
             state = checkpoint['ema']['module']
         else:
@@ -37,11 +37,11 @@ def __init__(self, ) -> None:
             self.model = cfg.model.deploy()
             self.postprocessor = cfg.postprocessor.deploy()
             print(self.postprocessor.deploy_mode)
-            
+
         def forward(self, images, orig_target_sizes):
             outputs = self.model(images)
             return self.postprocessor(outputs, orig_target_sizes)
-    
+
 
     model = Model()
 
@@ -54,13 +54,13 @@ def forward(self, images, orig_target_sizes):
     size = torch.tensor([[640, 640]])
 
     torch.onnx.export(
-        model, 
-        (data, size), 
+        model,
+        (data, size),
         args.file_name,
         input_names=['images', 'orig_target_sizes'],
         output_names=['labels', 'boxes', 'scores'],
         dynamic_axes=dynamic_axes,
-        opset_version=16, 
+        opset_version=16,
         verbose=False
     )
 
@@ -74,14 +74,14 @@ def forward(self, images, orig_target_sizes):
 
     if args.simplify:
         import onnxsim
-        dynamic = True 
+        dynamic = True
         input_shapes = {'images': data.shape, 'orig_target_sizes': size.shape} if dynamic else None
         onnx_model_simplify, check = onnxsim.simplify(args.file_name, input_shapes=input_shapes, dynamic_input_shape=dynamic)
         onnx.save(onnx_model_simplify, args.file_name)
         print(f'Simplify onnx model {check}...')
 
 
-    # import onnxruntime as ort 
+    # import onnxruntime as ort
     # from PIL import Image, ImageDraw, ImageFont
     # from torchvision.transforms import ToTensor
     # from src.data.coco.coco_dataset import mscoco_category2name, mscoco_category2label, mscoco_label2category