From 9c86618b8b22d6da1dddfc525e7982e522192b5a Mon Sep 17 00:00:00 2001 From: luochunhua Date: Mon, 23 May 2022 13:12:01 +0000 Subject: [PATCH 01/11] rename --- .../mask2former_r101_lsj_8x2_50e_coco.py | 7 -- .../mask2former_r50_lsj_8x2_50e_coco.py | 79 ------------------- ...n-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py | 2 +- ...ormer_swin-s-p4-w7-224_lsj_8x2_50e_coco.py | 37 --------- ...ormer_swin-t-p4-w7-224_lsj_8x2_50e_coco.py | 61 -------------- 5 files changed, 1 insertion(+), 185 deletions(-) delete mode 100644 configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py delete mode 100644 configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py delete mode 100644 configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py delete mode 100644 configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py diff --git a/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py deleted file mode 100644 index 5543fb0ebf9..00000000000 --- a/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py +++ /dev/null @@ -1,7 +0,0 @@ -_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] - -model = dict( - backbone=dict( - depth=101, - init_cfg=dict(type='Pretrained', - checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py deleted file mode 100644 index eca6135ba7c..00000000000 --- a/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py +++ /dev/null @@ -1,79 +0,0 @@ -_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py'] -num_things_classes = 80 -num_stuff_classes = 0 -num_classes = num_things_classes + num_stuff_classes -model = dict( - panoptic_head=dict( - num_things_classes=num_things_classes, - num_stuff_classes=num_stuff_classes, - loss_cls=dict(class_weight=[1.0] * num_classes + [0.1])), - panoptic_fusion_head=dict( - num_things_classes=num_things_classes, - num_stuff_classes=num_stuff_classes), - test_cfg=dict(panoptic_on=False)) - -# dataset settings -image_size = (1024, 1024) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -pad_cfg = dict(img=(128, 128, 128), masks=0, seg=255) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='RandomFlip', flip_ratio=0.5), - # large scale jittering - dict( - type='Resize', - img_scale=image_size, - ratio_range=(0.1, 2.0), - multiscale_mode='range', - keep_ratio=True), - dict( - type='RandomCrop', - crop_size=image_size, - crop_type='absolute', - recompute_bbox=True, - allow_negative_crop=True), - dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True), - dict(type='Pad', size=image_size, pad_val=pad_cfg), - dict(type='Normalize', **img_norm_cfg), - dict(type='DefaultFormatBundle', img_to_float=True), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Pad', size_divisor=32, pad_val=pad_cfg), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -data = dict( - _delete_=True, - samples_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py index b2b621ce781..fc3c2952ede 100644 --- a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py +++ b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py @@ -1,4 +1,4 @@ -_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py-panoptic'] pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa depths = [2, 2, 18, 2] diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py deleted file mode 100644 index 7b1b05abafe..00000000000 --- a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py +++ /dev/null @@ -1,37 +0,0 @@ -_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py'] -pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa - -depths = [2, 2, 18, 2] -model = dict( - backbone=dict( - depths=depths, init_cfg=dict(type='Pretrained', - checkpoint=pretrained))) - -# set all layers in backbone to lr_mult=0.1 -# set all norm layers, position_embeding, -# query_embeding, level_embeding to decay_multi=0.0 -backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) -backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) -embed_multi = dict(lr_mult=1.0, decay_mult=0.0) -custom_keys = { - 'backbone': dict(lr_mult=0.1, decay_mult=1.0), - 'backbone.patch_embed.norm': backbone_norm_multi, - 'backbone.norm': backbone_norm_multi, - 'absolute_pos_embed': backbone_embed_multi, - 'relative_position_bias_table': backbone_embed_multi, - 'query_embed': embed_multi, - 'query_feat': embed_multi, - 'level_embed': embed_multi -} -custom_keys.update({ - f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi - for stage_id, num_blocks in enumerate(depths) - for block_id in range(num_blocks) -}) -custom_keys.update({ - f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi - for stage_id in range(len(depths) - 1) -}) -# optimizer -optimizer = dict( - paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py deleted file mode 100644 index 0ccbe91c683..00000000000 --- a/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py +++ /dev/null @@ -1,61 +0,0 @@ -_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] -pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa -depths = [2, 2, 6, 2] -model = dict( - type='Mask2Former', - backbone=dict( - _delete_=True, - type='SwinTransformer', - embed_dims=96, - depths=depths, - num_heads=[3, 6, 12, 24], - window_size=7, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0., - attn_drop_rate=0., - drop_path_rate=0.3, - patch_norm=True, - out_indices=(0, 1, 2, 3), - with_cp=False, - convert_weights=True, - frozen_stages=-1, - init_cfg=dict(type='Pretrained', checkpoint=pretrained)), - panoptic_head=dict( - type='Mask2FormerHead', in_channels=[96, 192, 384, 768]), - init_cfg=None) - -# set all layers in backbone to lr_mult=0.1 -# set all norm layers, position_embeding, -# query_embeding, level_embeding to decay_multi=0.0 -backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) -backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) -embed_multi = dict(lr_mult=1.0, decay_mult=0.0) -custom_keys = { - 'backbone': dict(lr_mult=0.1, decay_mult=1.0), - 'backbone.patch_embed.norm': backbone_norm_multi, - 'backbone.norm': backbone_norm_multi, - 'absolute_pos_embed': backbone_embed_multi, - 'relative_position_bias_table': backbone_embed_multi, - 'query_embed': embed_multi, - 'query_feat': embed_multi, - 'level_embed': embed_multi -} -custom_keys.update({ - f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi - for stage_id, num_blocks in enumerate(depths) - for block_id in range(num_blocks) -}) -custom_keys.update({ - f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi - for stage_id in range(len(depths) - 1) -}) -# optimizer -optimizer = dict( - type='AdamW', - lr=0.0001, - weight_decay=0.05, - eps=1e-8, - betas=(0.9, 0.999), - paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) From 2a2d007796037e8c8b14fef74b051316abd85c04 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Mon, 23 May 2022 14:05:08 +0000 Subject: [PATCH 02/11] add configs for mask2former instance segmentation --- .../mask2former_r101_lsj_8x2_50e_coco.py | 7 ++ .../mask2former_r50_lsj_8x2_50e_coco.py | 79 +++++++++++++++++++ ...ormer_swin-s-p4-w7-224_lsj_8x2_50e_coco.py | 37 +++++++++ ...ormer_swin-t-p4-w7-224_lsj_8x2_50e_coco.py | 61 ++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py create mode 100644 configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py create mode 100644 configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py create mode 100644 configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py diff --git a/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py new file mode 100644 index 00000000000..5543fb0ebf9 --- /dev/null +++ b/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py @@ -0,0 +1,7 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py new file mode 100644 index 00000000000..eca6135ba7c --- /dev/null +++ b/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py @@ -0,0 +1,79 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py'] +num_things_classes = 80 +num_stuff_classes = 0 +num_classes = num_things_classes + num_stuff_classes +model = dict( + panoptic_head=dict( + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes, + loss_cls=dict(class_weight=[1.0] * num_classes + [0.1])), + panoptic_fusion_head=dict( + num_things_classes=num_things_classes, + num_stuff_classes=num_stuff_classes), + test_cfg=dict(panoptic_on=False)) + +# dataset settings +image_size = (1024, 1024) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +pad_cfg = dict(img=(128, 128, 128), masks=0, seg=255) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='RandomFlip', flip_ratio=0.5), + # large scale jittering + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.1, 2.0), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_size=image_size, + crop_type='absolute', + recompute_bbox=True, + allow_negative_crop=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True), + dict(type='Pad', size=image_size, pad_val=pad_cfg), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle', img_to_float=True), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Pad', size_divisor=32, pad_val=pad_cfg), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +data = dict( + _delete_=True, + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py new file mode 100644 index 00000000000..7b1b05abafe --- /dev/null +++ b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py @@ -0,0 +1,37 @@ +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa + +depths = [2, 2, 18, 2] +model = dict( + backbone=dict( + depths=depths, init_cfg=dict(type='Pretrained', + checkpoint=pretrained))) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) diff --git a/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py new file mode 100644 index 00000000000..0ccbe91c683 --- /dev/null +++ b/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py @@ -0,0 +1,61 @@ +_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa +depths = [2, 2, 6, 2] +model = dict( + type='Mask2Former', + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=depths, + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + convert_weights=True, + frozen_stages=-1, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + panoptic_head=dict( + type='Mask2FormerHead', in_channels=[96, 192, 384, 768]), + init_cfg=None) + +# set all layers in backbone to lr_mult=0.1 +# set all norm layers, position_embeding, +# query_embeding, level_embeding to decay_multi=0.0 +backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) +backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) +embed_multi = dict(lr_mult=1.0, decay_mult=0.0) +custom_keys = { + 'backbone': dict(lr_mult=0.1, decay_mult=1.0), + 'backbone.patch_embed.norm': backbone_norm_multi, + 'backbone.norm': backbone_norm_multi, + 'absolute_pos_embed': backbone_embed_multi, + 'relative_position_bias_table': backbone_embed_multi, + 'query_embed': embed_multi, + 'query_feat': embed_multi, + 'level_embed': embed_multi +} +custom_keys.update({ + f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi + for stage_id, num_blocks in enumerate(depths) + for block_id in range(num_blocks) +}) +custom_keys.update({ + f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi + for stage_id in range(len(depths) - 1) +}) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.05, + eps=1e-8, + betas=(0.9, 0.999), + paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) From d421451880bda895ffb8f6b3c24216e4f3039a74 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Mon, 23 May 2022 14:06:09 +0000 Subject: [PATCH 03/11] rename panoptic seg related and add instance seg related --- configs/mask2former/README.md | 30 +++++-- configs/mask2former/metafile.yml | 144 ++++++++++++++++++++++--------- 2 files changed, 125 insertions(+), 49 deletions(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 36be8dbf498..e2c4664f019 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -38,15 +38,27 @@ mmdetection ## Results and Models -| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | PQ | box mAP | mask mAP | Config | Download | -| :------: | :-----: | :----------: | :-----: | :------: | :------------: | :--: | :-----: | :------: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| R-50 | pytorch | ImageNet-1K | 50e | 13.9 | - | 51.9 | 44.8 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220326_224516-0091ce2b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220326_224516.log.json) | -| R-101 | pytorch | ImageNet-1K | 50e | 16.1 | - | 52.4 | 45.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220329_225104-bb4df090.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220329_225104.log.json) | -| Swin-T | - | ImageNet-1K | 50e | 15.9 | - | 53.4 | 46.3 | 43.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220326_224553-c92f921c.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220326_224553.log.json) | -| Swin-S | - | ImageNet-1K | 50e | 19.1 | - | 54.5 | 47.8 | 44.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220329_225200-9f633bcf.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220329_225200.log.json) | -| Swin-B | - | ImageNet-1K | 50e | 26.0 | - | 55.1 | 48.2 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco_20220331_002244-1db756b2.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco_20220331_002244.log.json) | -| Swin-B | - | ImageNet-21K | 50e | 25.8 | - | 56.3 | 50.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco_20220329_230021-89d7c1b1.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco_20220329_230021.log.json) | -| Swin-L | - | ImageNet-21K | 100e | 21.1 | - | 57.6 | 52.2 | 48.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco_20220407_104949-c481ee28.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco_20220407_104949.log.json) | +### Panoptic segmentation +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | PQ | box mAP | mask mAP | Config | Download | +| :------: | :-----: | :----------: | :-----: | :------: | :------------: | :---: | :-----: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | ImageNet-1K | 50e | 13.9 | - | 51.9 | 44.8 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 16.1 | - | 52.4 | 45.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.9 | - | 53.4 | 46.3 | 43.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 19.1 | - | 54.5 | 47.8 | 44.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200.log.json) | +| Swin-B | - | ImageNet-1K | 50e | 26.0 | - | 55.1 | 48.2 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244.log.json) | +| Swin-B | - | ImageNet-21K | 50e | 25.8 | - | 56.3 | 50.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021.log.json) | +| Swin-L | - | ImageNet-21K | 100e | 21.1 | - | 57.6 | 52.2 | 48.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949.log.json) | + +### Instance segmentation +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | +| -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) | [log](ttps://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | + +Note: We have trained the above models(instance segmentation) many times with the algorithm we implemented (see [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)), and the performance of the trained models is relatively stable (+- 0.2), and there is a certain gap between the performance of the models we trained and the results mentioned in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code are unstable, and the performance may be lower than the target results (see [issue](https://github.com/facebookresearch/Mask2Former/issues/46)). + ## Citation diff --git a/configs/mask2former/metafile.yml b/configs/mask2former/metafile.yml index 2ceed8056af..d9f469292c2 100644 --- a/configs/mask2former/metafile.yml +++ b/configs/mask2former/metafile.yml @@ -17,29 +17,45 @@ Collections: Version: v2.23.0 Models: -- Name: mask2former_r50_lsj_8x2_50e_coco +- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic In Collection: Mask2Former - Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py + Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py Metadata: - Training Memory (GB): 13.9 + Training Memory (GB): 19.1 Iterations: 368750 Results: - Task: Object Detection Dataset: COCO Metrics: - box AP: 44.8 + box AP: 47.8 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 41.9 + mask AP: 44.5 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 51.9 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220326_224516-0091ce2b.pth + PQ: 54.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth - Name: mask2former_r101_lsj_8x2_50e_coco In Collection: Mask2Former Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 15.5 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth +- Name: mask2former_r101_lsj_8x2_50e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py Metadata: Training Memory (GB): 16.1 Iterations: 368750 @@ -56,70 +72,86 @@ Models: Dataset: COCO Metrics: PQ: 52.4 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220329_225104-bb4df090.pth -- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth +- Name: mask2former_r50_lsj_8x2_50e_coco-panoptic In Collection: Mask2Former - Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py + Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py Metadata: - Training Memory (GB): 15.9 + Training Memory (GB): 13.9 Iterations: 368750 Results: - Task: Object Detection Dataset: COCO Metrics: - box AP: 46.3 + box AP: 44.8 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 43.4 + mask AP: 41.9 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 53.4 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220326_224553-c92f921c.pth -- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco + PQ: 51.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth +- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic In Collection: Mask2Former - Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py + Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py Metadata: - Training Memory (GB): 19.1 + Training Memory (GB): 15.9 Iterations: 368750 Results: - Task: Object Detection Dataset: COCO Metrics: - box AP: 47.8 + box AP: 46.3 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 44.5 + mask AP: 43.4 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 54.5 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220329_225200-9f633bcf.pth -- Name: mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco + PQ: 53.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth +- Name: mask2former_r50_lsj_8x2_50e_coco In Collection: Mask2Former - Config: configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco.py + Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py Metadata: - Training Memory (GB): 26.0 + Training Memory (GB): 13.7 Iterations: 368750 Results: - Task: Object Detection Dataset: COCO Metrics: - box AP: 48.2 + box AP: 45.7 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 44.9 + mask AP: 42.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth +- Name: mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py + Metadata: + Training Memory (GB): 21.1 + Iterations: 737500 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 52.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 48.5 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 55.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco_20220331_002244-1db756b2.pth -- Name: mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco + PQ: 57.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth +- Name: mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic In Collection: Mask2Former - Config: configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco.py + Config: configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py Metadata: Training Memory (GB): 25.8 Iterations: 368750 @@ -136,24 +168,56 @@ Models: Dataset: COCO Metrics: PQ: 56.3 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco_20220329_230021-89d7c1b1.pth -- Name: mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth +- Name: mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic In Collection: Mask2Former - Config: configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco.py + Config: configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py Metadata: - Training Memory (GB): 21.1 - Iterations: 737500 + Training Memory (GB): 26.0 + Iterations: 368750 Results: - Task: Object Detection Dataset: COCO Metrics: - box AP: 52.2 + box AP: 48.2 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 48.5 + mask AP: 44.9 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 57.6 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco_20220407_104949-c481ee28.pth + PQ: 55.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth +- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 15.3 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 44.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth +- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco + In Collection: Mask2Former + Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py + Metadata: + Training Memory (GB): 18.8 + Iterations: 368750 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 46.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth From af049e1ffcfdf104e8b779c37796e6d2f6b570cb Mon Sep 17 00:00:00 2001 From: luochunhua Date: Mon, 23 May 2022 14:31:33 +0000 Subject: [PATCH 04/11] update readme --- configs/mask2former/README.md | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index e2c4664f019..d085f3dc404 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -39,26 +39,27 @@ mmdetection ## Results and Models ### Panoptic segmentation -| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | PQ | box mAP | mask mAP | Config | Download | -| :------: | :-----: | :----------: | :-----: | :------: | :------------: | :---: | :-----: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| R-50 | pytorch | ImageNet-1K | 50e | 13.9 | - | 51.9 | 44.8 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516.log.json) | -| R-101 | pytorch | ImageNet-1K | 50e | 16.1 | - | 52.4 | 45.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104.log.json) | -| Swin-T | - | ImageNet-1K | 50e | 15.9 | - | 53.4 | 46.3 | 43.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553.log.json) | -| Swin-S | - | ImageNet-1K | 50e | 19.1 | - | 54.5 | 47.8 | 44.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200.log.json) | -| Swin-B | - | ImageNet-1K | 50e | 26.0 | - | 55.1 | 48.2 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244.log.json) | -| Swin-B | - | ImageNet-21K | 50e | 25.8 | - | 56.3 | 50.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021.log.json) | -| Swin-L | - | ImageNet-21K | 100e | 21.1 | - | 57.6 | 52.2 | 48.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949.log.json) | + +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | PQ | box mAP | mask mAP | Config | Download | +| :------: | :-----: | :----------: | :-----: | :------: | :------------: | :--: | :-----: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | ImageNet-1K | 50e | 13.9 | - | 51.9 | 44.8 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 16.1 | - | 52.4 | 45.3 | 42.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.9 | - | 53.4 | 46.3 | 43.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 19.1 | - | 54.5 | 47.8 | 44.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200.log.json) | +| Swin-B | - | ImageNet-1K | 50e | 26.0 | - | 55.1 | 48.2 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244.log.json) | +| Swin-B | - | ImageNet-21K | 50e | 25.8 | - | 56.3 | 50.0 | 46.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021.log.json) | +| Swin-L | - | ImageNet-21K | 100e | 21.1 | - | 57.6 | 52.2 | 48.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949.log.json) | ### Instance segmentation -| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | -| -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | -| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) | [log](ttps://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | -| Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | -| Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | -Note: We have trained the above models(instance segmentation) many times with the algorithm we implemented (see [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)), and the performance of the trained models is relatively stable (+- 0.2), and there is a certain gap between the performance of the models we trained and the results mentioned in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code are unstable, and the performance may be lower than the target results (see [issue](https://github.com/facebookresearch/Mask2Former/issues/46)). +| Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | +| -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](ttps://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | +| Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | +| Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | +Note: We have trained the above models(instance segmentation) many times with the algorithm we implemented (see [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)), and the performance of the trained models is relatively stable (+- 0.2), and there is a certain gap between the performance of the models we trained and the results mentioned in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable, and the performance may be lower than the target results (see [issue](https://github.com/facebookresearch/Mask2Former/issues/46)). ## Citation From e5146c3ca75244aea979d592a0a0e552d9206097 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Mon, 23 May 2022 14:32:41 +0000 Subject: [PATCH 05/11] fix bug in gather_model.py --- .dev_scripts/gather_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dev_scripts/gather_models.py b/.dev_scripts/gather_models.py index fe2a15f9521..42e615c7154 100644 --- a/.dev_scripts/gather_models.py +++ b/.dev_scripts/gather_models.py @@ -269,7 +269,7 @@ def main(): # when using Panoptic Dataset, the evaluation key is 'PQ'. for i, key in enumerate(results_lut): if 'mAP' not in key and 'PQ' not in key: - results_lut[i] = key + 'm_AP' + results_lut[i] = key + '_mAP' model_performance = get_final_results(log_json_path, final_epoch_or_iter, results_lut, by_epoch) From 81d669270135e99be4203034e4c697766186b9a5 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Wed, 25 May 2022 02:40:21 +0000 Subject: [PATCH 06/11] fix link --- configs/mask2former/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index d085f3dc404..02d4e09ecb1 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -55,7 +55,7 @@ mmdetection | Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | | -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | -| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](ttps://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | | Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | | Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | From 19d75126b880a1248db9cb7c585fe6f86994c194 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Wed, 25 May 2022 02:42:36 +0000 Subject: [PATCH 07/11] fix link --- configs/mask2former/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 02d4e09ecb1..3b2f7d66d64 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -55,7 +55,7 @@ mmdetection | Backbone | style | Pretrain | Lr schd | Mem (GB) | Inf time (fps) | box mAP | mask mAP | Config | Download | | -------- | ------- | ----------- | ------- | -------- | -------------- | ------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | R-50 | pytorch | ImageNet-1K | 50e | 13.7 | - | 45.7 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028.log.json) | -| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | +| R-101 | pytorch | ImageNet-1K | 50e | 15.5 | - | 46.7 | 44.0 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250.log.json) | | Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | | Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | From 1d74defd0d5c43fa8fbc3cf5b5878feb83bf1dee Mon Sep 17 00:00:00 2001 From: luochunhua Date: Fri, 27 May 2022 08:34:19 +0000 Subject: [PATCH 08/11] update readme --- configs/mask2former/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 3b2f7d66d64..20f91a5bda5 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -59,7 +59,7 @@ mmdetection | Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | | Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | -Note: We have trained the above models(instance segmentation) many times with the algorithm we implemented (see [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)), and the performance of the trained models is relatively stable (+- 0.2), and there is a certain gap between the performance of the models we trained and the results mentioned in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable, and the performance may be lower than the target results (see [issue](https://github.com/facebookresearch/Mask2Former/issues/46)). +Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and have a certain gap in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). ## Citation From 9121ccecb869969e2dbc9fb8378908514cb8ca1b Mon Sep 17 00:00:00 2001 From: luochunhua Date: Fri, 27 May 2022 08:41:17 +0000 Subject: [PATCH 09/11] update config --- .../mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py index fc3c2952ede..b2b621ce781 100644 --- a/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py +++ b/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py @@ -1,4 +1,4 @@ -_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py-panoptic'] +_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa depths = [2, 2, 18, 2] From 161a5317a9f5c0a63282e2d85f18530222ed189c Mon Sep 17 00:00:00 2001 From: luochunhua Date: Fri, 27 May 2022 08:46:07 +0000 Subject: [PATCH 10/11] update readme --- configs/mask2former/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 20f91a5bda5..2dc828b3641 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -59,7 +59,7 @@ mmdetection | Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | | Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | -Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and have a certain gap in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). +Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and and have a certain gap (about 0.2 AP) in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). ## Citation From f8c575fecaf0a10ac573fa29061994ef7133fecb Mon Sep 17 00:00:00 2001 From: luochunhua Date: Fri, 27 May 2022 08:49:37 +0000 Subject: [PATCH 11/11] update readme --- configs/mask2former/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 2dc828b3641..ebce50d84b5 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -59,7 +59,7 @@ mmdetection | Swin-T | - | ImageNet-1K | 50e | 15.3 | - | 47.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649.log.json) | | Swin-S | - | ImageNet-1K | 50e | 18.8 | - | 49.3 | 46.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth) \| [log](https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756.log.json) | -Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and and have a certain gap (about 0.2 AP) in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). +Note: We have trained the instance segmentation models many times (see more details in [PR 7571](https://github.com/open-mmlab/mmdetection/pull/7571)). The results of the trained models are relatively stable (+- 0.2), and have a certain gap (about 0.2 AP) in comparison with the results in the [paper](http://arxiv.org/abs/2112.01527). However, the performance of the model trained with the official code is unstable and may also be slightly lower than the reported results as mentioned in the [issue](https://github.com/facebookresearch/Mask2Former/issues/46). ## Citation