diff --git a/configs/_base_/datasets/lapa.py b/configs/_base_/datasets/lapa.py new file mode 100644 index 0000000000..26a0843404 --- /dev/null +++ b/configs/_base_/datasets/lapa.py @@ -0,0 +1,688 @@ +dataset_info = dict( + dataset_name='lapa', + paper_info=dict( + author='Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, ' + 'Yue and Wang, Xiaobo and Mei, Tao', + title='A New Dataset and Boundary-Attention Semantic ' + 'Segmentation for Face Parsing.', + container='Proceedings of the AAAI Conference on ' + 'Artificial Intelligence 2020', + year='2020', + homepage='https://github.com/JDAI-CV/lapa-dataset', + ), + keypoint_info={ + 0: + dict( + name='kpt-0', id=0, color=[255, 0, 0], type='upper', + swap='kpt-32'), + 1: + dict( + name='kpt-1', id=1, color=[255, 0, 0], type='upper', + swap='kpt-31'), + 2: + dict( + name='kpt-2', id=2, color=[255, 0, 0], type='upper', + swap='kpt-30'), + 3: + dict( + name='kpt-3', id=3, color=[255, 0, 0], type='lower', + swap='kpt-29'), + 4: + dict( + name='kpt-4', id=4, color=[255, 0, 0], type='lower', + swap='kpt-28'), + 5: + dict( + name='kpt-5', id=5, color=[255, 0, 0], type='lower', + swap='kpt-27'), + 6: + dict( + name='kpt-6', id=6, color=[255, 0, 0], type='lower', + swap='kpt-26'), + 7: + dict( + name='kpt-7', id=7, color=[255, 0, 0], type='lower', + swap='kpt-25'), + 8: + dict( + name='kpt-8', id=8, color=[255, 0, 0], type='lower', + swap='kpt-24'), + 9: + dict( + name='kpt-9', id=9, color=[255, 0, 0], type='lower', + swap='kpt-23'), + 10: + dict( + name='kpt-10', + id=10, + color=[255, 0, 0], + type='lower', + swap='kpt-22'), + 11: + dict( + name='kpt-11', + id=11, + color=[255, 0, 0], + type='lower', + swap='kpt-21'), + 12: + dict( + name='kpt-12', + id=12, + color=[255, 0, 0], + type='lower', + swap='kpt-20'), + 13: + dict( + name='kpt-13', + id=13, + color=[255, 0, 0], + type='lower', + swap='kpt-19'), + 14: + dict( + name='kpt-14', + id=14, + color=[255, 0, 0], + type='lower', + swap='kpt-18'), + 15: + dict( + name='kpt-15', + id=15, + color=[255, 0, 0], + type='lower', + swap='kpt-17'), + 16: + dict(name='kpt-16', id=16, color=[255, 0, 0], type='lower', swap=''), + 17: + dict( + name='kpt-17', + id=17, + color=[255, 0, 0], + type='lower', + swap='kpt-15'), + 18: + dict( + name='kpt-18', + id=18, + color=[255, 0, 0], + type='lower', + swap='kpt-14'), + 19: + dict( + name='kpt-19', + id=19, + color=[255, 0, 0], + type='lower', + swap='kpt-13'), + 20: + dict( + name='kpt-20', + id=20, + color=[255, 0, 0], + type='lower', + swap='kpt-12'), + 21: + dict( + name='kpt-21', + id=21, + color=[255, 0, 0], + type='lower', + swap='kpt-11'), + 22: + dict( + name='kpt-22', + id=22, + color=[255, 0, 0], + type='lower', + swap='kpt-10'), + 23: + dict( + name='kpt-23', + id=23, + color=[255, 0, 0], + type='lower', + swap='kpt-9'), + 24: + dict( + name='kpt-24', + id=24, + color=[255, 0, 0], + type='lower', + swap='kpt-8'), + 25: + dict( + name='kpt-25', + id=25, + color=[255, 0, 0], + type='lower', + swap='kpt-7'), + 26: + dict( + name='kpt-26', + id=26, + color=[255, 0, 0], + type='lower', + swap='kpt-6'), + 27: + dict( + name='kpt-27', + id=27, + color=[255, 0, 0], + type='lower', + swap='kpt-5'), + 28: + dict( + name='kpt-28', + id=28, + color=[255, 0, 0], + type='lower', + swap='kpt-4'), + 29: + dict( + name='kpt-29', + id=29, + color=[255, 0, 0], + type='lower', + swap='kpt-3'), + 30: + dict( + name='kpt-30', + id=30, + color=[255, 0, 0], + type='upper', + swap='kpt-2'), + 31: + dict( + name='kpt-31', + id=31, + color=[255, 0, 0], + type='upper', + swap='kpt-1'), + 32: + dict( + name='kpt-32', + id=32, + color=[255, 0, 0], + type='upper', + swap='kpt-0'), + 33: + dict( + name='kpt-33', + id=33, + color=[255, 0, 0], + type='upper', + swap='kpt-46'), + 34: + dict( + name='kpt-34', + id=34, + color=[255, 0, 0], + type='upper', + swap='kpt-45'), + 35: + dict( + name='kpt-35', + id=35, + color=[255, 0, 0], + type='upper', + swap='kpt-44'), + 36: + dict( + name='kpt-36', + id=36, + color=[255, 0, 0], + type='upper', + swap='kpt-43'), + 37: + dict( + name='kpt-37', + id=37, + color=[255, 0, 0], + type='upper', + swap='kpt-42'), + 38: + dict( + name='kpt-38', + id=38, + color=[255, 0, 0], + type='upper', + swap='kpt-50'), + 39: + dict( + name='kpt-39', + id=39, + color=[255, 0, 0], + type='upper', + swap='kpt-49'), + 40: + dict( + name='kpt-40', + id=40, + color=[255, 0, 0], + type='upper', + swap='kpt-48'), + 41: + dict( + name='kpt-41', + id=41, + color=[255, 0, 0], + type='upper', + swap='kpt-47'), + 42: + dict( + name='kpt-42', + id=42, + color=[255, 0, 0], + type='upper', + swap='kpt-37'), + 43: + dict( + name='kpt-43', + id=43, + color=[255, 0, 0], + type='upper', + swap='kpt-36'), + 44: + dict( + name='kpt-44', + id=44, + color=[255, 0, 0], + type='upper', + swap='kpt-35'), + 45: + dict( + name='kpt-45', + id=45, + color=[255, 0, 0], + type='upper', + swap='kpt-34'), + 46: + dict( + name='kpt-46', + id=46, + color=[255, 0, 0], + type='upper', + swap='kpt-33'), + 47: + dict( + name='kpt-47', + id=47, + color=[255, 0, 0], + type='upper', + swap='kpt-41'), + 48: + dict( + name='kpt-48', + id=48, + color=[255, 0, 0], + type='upper', + swap='kpt-40'), + 49: + dict( + name='kpt-49', + id=49, + color=[255, 0, 0], + type='upper', + swap='kpt-39'), + 50: + dict( + name='kpt-50', + id=50, + color=[255, 0, 0], + type='upper', + swap='kpt-38'), + 51: + dict(name='kpt-51', id=51, color=[255, 0, 0], type='upper', swap=''), + 52: + dict(name='kpt-52', id=52, color=[255, 0, 0], type='upper', swap=''), + 53: + dict(name='kpt-53', id=53, color=[255, 0, 0], type='lower', swap=''), + 54: + dict(name='kpt-54', id=54, color=[255, 0, 0], type='lower', swap=''), + 55: + dict( + name='kpt-55', + id=55, + color=[255, 0, 0], + type='upper', + swap='kpt-65'), + 56: + dict( + name='kpt-56', + id=56, + color=[255, 0, 0], + type='lower', + swap='kpt-64'), + 57: + dict( + name='kpt-57', + id=57, + color=[255, 0, 0], + type='lower', + swap='kpt-63'), + 58: + dict( + name='kpt-58', + id=58, + color=[255, 0, 0], + type='lower', + swap='kpt-62'), + 59: + dict( + name='kpt-59', + id=59, + color=[255, 0, 0], + type='lower', + swap='kpt-61'), + 60: + dict(name='kpt-60', id=60, color=[255, 0, 0], type='lower', swap=''), + 61: + dict( + name='kpt-61', + id=61, + color=[255, 0, 0], + type='lower', + swap='kpt-59'), + 62: + dict( + name='kpt-62', + id=62, + color=[255, 0, 0], + type='lower', + swap='kpt-58'), + 63: + dict( + name='kpt-63', + id=63, + color=[255, 0, 0], + type='lower', + swap='kpt-57'), + 64: + dict( + name='kpt-64', + id=64, + color=[255, 0, 0], + type='lower', + swap='kpt-56'), + 65: + dict( + name='kpt-65', + id=65, + color=[255, 0, 0], + type='upper', + swap='kpt-55'), + 66: + dict( + name='kpt-66', + id=66, + color=[255, 0, 0], + type='upper', + swap='kpt-79'), + 67: + dict( + name='kpt-67', + id=67, + color=[255, 0, 0], + type='upper', + swap='kpt-78'), + 68: + dict( + name='kpt-68', + id=68, + color=[255, 0, 0], + type='upper', + swap='kpt-77'), + 69: + dict( + name='kpt-69', + id=69, + color=[255, 0, 0], + type='upper', + swap='kpt-76'), + 70: + dict( + name='kpt-70', + id=70, + color=[255, 0, 0], + type='upper', + swap='kpt-75'), + 71: + dict( + name='kpt-71', + id=71, + color=[255, 0, 0], + type='upper', + swap='kpt-82'), + 72: + dict( + name='kpt-72', + id=72, + color=[255, 0, 0], + type='upper', + swap='kpt-81'), + 73: + dict( + name='kpt-73', + id=73, + color=[255, 0, 0], + type='upper', + swap='kpt-80'), + 74: + dict( + name='kpt-74', + id=74, + color=[255, 0, 0], + type='upper', + swap='kpt-83'), + 75: + dict( + name='kpt-75', + id=75, + color=[255, 0, 0], + type='upper', + swap='kpt-70'), + 76: + dict( + name='kpt-76', + id=76, + color=[255, 0, 0], + type='upper', + swap='kpt-69'), + 77: + dict( + name='kpt-77', + id=77, + color=[255, 0, 0], + type='upper', + swap='kpt-68'), + 78: + dict( + name='kpt-78', + id=78, + color=[255, 0, 0], + type='upper', + swap='kpt-67'), + 79: + dict( + name='kpt-79', + id=79, + color=[255, 0, 0], + type='upper', + swap='kpt-66'), + 80: + dict( + name='kpt-80', + id=80, + color=[255, 0, 0], + type='upper', + swap='kpt-73'), + 81: + dict( + name='kpt-81', + id=81, + color=[255, 0, 0], + type='upper', + swap='kpt-72'), + 82: + dict( + name='kpt-82', + id=82, + color=[255, 0, 0], + type='upper', + swap='kpt-71'), + 83: + dict( + name='kpt-83', + id=83, + color=[255, 0, 0], + type='upper', + swap='kpt-74'), + 84: + dict( + name='kpt-84', + id=84, + color=[255, 0, 0], + type='lower', + swap='kpt-90'), + 85: + dict( + name='kpt-85', + id=85, + color=[255, 0, 0], + type='lower', + swap='kpt-89'), + 86: + dict( + name='kpt-86', + id=86, + color=[255, 0, 0], + type='lower', + swap='kpt-88'), + 87: + dict(name='kpt-87', id=87, color=[255, 0, 0], type='lower', swap=''), + 88: + dict( + name='kpt-88', + id=88, + color=[255, 0, 0], + type='lower', + swap='kpt-86'), + 89: + dict( + name='kpt-89', + id=89, + color=[255, 0, 0], + type='lower', + swap='kpt-85'), + 90: + dict( + name='kpt-90', + id=90, + color=[255, 0, 0], + type='lower', + swap='kpt-84'), + 91: + dict( + name='kpt-91', + id=91, + color=[255, 0, 0], + type='lower', + swap='kpt-95'), + 92: + dict( + name='kpt-92', + id=92, + color=[255, 0, 0], + type='lower', + swap='kpt-94'), + 93: + dict(name='kpt-93', id=93, color=[255, 0, 0], type='lower', swap=''), + 94: + dict( + name='kpt-94', + id=94, + color=[255, 0, 0], + type='lower', + swap='kpt-92'), + 95: + dict( + name='kpt-95', + id=95, + color=[255, 0, 0], + type='lower', + swap='kpt-91'), + 96: + dict( + name='kpt-96', + id=96, + color=[255, 0, 0], + type='lower', + swap='kpt-100'), + 97: + dict( + name='kpt-97', + id=97, + color=[255, 0, 0], + type='lower', + swap='kpt-99'), + 98: + dict(name='kpt-98', id=98, color=[255, 0, 0], type='lower', swap=''), + 99: + dict( + name='kpt-99', + id=99, + color=[255, 0, 0], + type='lower', + swap='kpt-97'), + 100: + dict( + name='kpt-100', + id=100, + color=[255, 0, 0], + type='lower', + swap='kpt-96'), + 101: + dict( + name='kpt-101', + id=101, + color=[255, 0, 0], + type='lower', + swap='kpt-103'), + 102: + dict(name='kpt-102', id=102, color=[255, 0, 0], type='lower', swap=''), + 103: + dict( + name='kpt-103', + id=103, + color=[255, 0, 0], + type='lower', + swap='kpt-101'), + 104: + dict( + name='kpt-104', + id=104, + color=[255, 0, 0], + type='upper', + swap='kpt-105'), + 105: + dict( + name='kpt-105', + id=105, + color=[255, 0, 0], + type='upper', + swap='kpt-104') + }, + skeleton_info={}, + joint_weights=[ + 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, + 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, + 0.8, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, + 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, + 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, + 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.0, 1.0 + ], + sigmas=[]) diff --git a/configs/face_2d_keypoint/rtmpose/README.md b/configs/face_2d_keypoint/rtmpose/README.md index d309696bed..5381e966f6 100644 --- a/configs/face_2d_keypoint/rtmpose/README.md +++ b/configs/face_2d_keypoint/rtmpose/README.md @@ -22,3 +22,11 @@ Results on WFLW dataset | Model | Input Size | NME | Details and Download | | :-------: | :--------: | :--: | :---------------------------------------: | | RTMPose-m | 256x256 | 4.01 | [rtmpose_wflw.md](./wflw/rtmpose_wflw.md) | + +### LaPa Dataset + +Results on LaPa dataset + +| Model | Input Size | NME | Details and Download | +| :-------: | :--------: | :--: | :---------------------------------------: | +| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./wflw/rtmpose_lapa.md) | diff --git a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py new file mode 100644 index 0000000000..97b7104e9a --- /dev/null +++ b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py @@ -0,0 +1,247 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 120 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=1) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=106, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'LapaDataset' +data_mode = 'topdown' +data_root = 'data/LaPa/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/', +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.2), + dict(type='MedianBlur', p=0.2), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_train.json', + data_prefix=dict(img='train/images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_val.json', + data_prefix=dict(img='val/images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_test.json', + data_prefix=dict(img='test/images/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='NME', rule='less', max_keep_ckpts=1, interval=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md new file mode 100644 index 0000000000..62a3f25157 --- /dev/null +++ b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md @@ -0,0 +1,40 @@ + + +
+RTMDet (ArXiv 2022) + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +
+ + + +
+LaPa (AAAI'2020) + +```bibtex +@inproceedings{liu2020new, + title={A New Dataset and Boundary-Attention Semantic Segmentation for Face Parsing.}, + author={Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, Yue and Wang, Xiaobo and Mei, Tao}, + booktitle={AAAI}, + pages={11637--11644}, + year={2020} +} +``` + +
+ +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: | +| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.29 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.json) | diff --git a/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml new file mode 100644 index 0000000000..96acff8de6 --- /dev/null +++ b/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py + In Collection: RTMPose + Alias: face + Metadata: + Architecture: + - RTMPose + Training Data: LaPa + Name: rtmpose-m_8xb64-120e_lapa-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 1.29 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth diff --git a/demo/topdown_demo_with_mmdet.py b/demo/topdown_demo_with_mmdet.py index c2e3d8d714..cd001e8db6 100644 --- a/demo/topdown_demo_with_mmdet.py +++ b/demo/topdown_demo_with_mmdet.py @@ -214,7 +214,8 @@ def main(): if output_file: img_vis = visualizer.get_image() - mmcv.imwrite(mmcv.rgb2bgr(img_vis), output_file) + if args.show: + mmcv.imwrite(mmcv.rgb2bgr(img_vis), output_file) elif input_type in ['webcam', 'video']: from mmpose.visualization import FastVisualizer diff --git a/docs/en/dataset_zoo/2d_face_keypoint.md b/docs/en/dataset_zoo/2d_face_keypoint.md index 17eb823954..1fe40273db 100644 --- a/docs/en/dataset_zoo/2d_face_keypoint.md +++ b/docs/en/dataset_zoo/2d_face_keypoint.md @@ -10,6 +10,7 @@ MMPose supported datasets: - [AFLW](#aflw-dataset) \[ [Homepage](https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/) \] - [COFW](#cofw-dataset) \[ [Homepage](http://www.vision.caltech.edu/xpburgos/ICCV13/) \] - [COCO-WholeBody-Face](#coco-wholebody-face) \[ [Homepage](https://github.com/jin-s13/COCO-WholeBody/) \] +- [LaPa](#lapa-dataset) \[ [Homepage](https://github.com/JDAI-CV/lapa-dataset) \] ## 300W Dataset @@ -325,3 +326,58 @@ mmpose Please also install the latest version of [Extended COCO API](https://github.com/jin-s13/xtcocoapi) to support COCO-WholeBody evaluation: `pip install xtcocotools` + +## LaPa + + + +
+LaPa (AAAI'2020) + +```bibtex +@inproceedings{liu2020new, + title={A New Dataset and Boundary-Attention Semantic Segmentation for Face Parsing.}, + author={Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, Yue and Wang, Xiaobo and Mei, Tao}, + booktitle={AAAI}, + pages={11637--11644}, + year={2020} +} +``` + +
+ +
+ +
+ +For [LaPa](https://github.com/JDAI-CV/lapa-dataset) dataset, images can be downloaded from [their github page](https://github.com/JDAI-CV/lapa-dataset). + +Download and extract them under $MMPOSE/data, and use our `tools/dataset_converters/lapa2coco.py` to make them look like this: + +```text +mmpose +├── mmpose +├── docs +├── tests +├── tools +├── configs +`── data + │── LaPa + │-- annotations + │ │-- lapa_train.json + │ |-- lapa_val.json + │ |-- lapa_test.json + │-- train + │ │-- images + │ │-- labels + │ │-- landmarks + │-- val + │ │-- images + │ │-- labels + │ │-- landmarks + `-- test + │ │-- images + │ │-- labels + │ │-- landmarks + +``` diff --git a/mmpose/datasets/datasets/face/__init__.py b/mmpose/datasets/datasets/face/__init__.py index e0a725cd0e..700cb605f7 100644 --- a/mmpose/datasets/datasets/face/__init__.py +++ b/mmpose/datasets/datasets/face/__init__.py @@ -3,9 +3,10 @@ from .coco_wholebody_face_dataset import CocoWholeBodyFaceDataset from .cofw_dataset import COFWDataset from .face_300w_dataset import Face300WDataset +from .lapa_dataset import LapaDataset from .wflw_dataset import WFLWDataset __all__ = [ 'Face300WDataset', 'WFLWDataset', 'AFLWDataset', 'COFWDataset', - 'CocoWholeBodyFaceDataset' + 'CocoWholeBodyFaceDataset', 'LapaDataset' ] diff --git a/mmpose/datasets/datasets/face/lapa_dataset.py b/mmpose/datasets/datasets/face/lapa_dataset.py new file mode 100644 index 0000000000..1a5bdc4ec0 --- /dev/null +++ b/mmpose/datasets/datasets/face/lapa_dataset.py @@ -0,0 +1,54 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmpose.registry import DATASETS +from ..base import BaseCocoStyleDataset + + +@DATASETS.register_module() +class LapaDataset(BaseCocoStyleDataset): + """LaPa dataset for face keypoint localization. + + "A New Dataset and Boundary-Attention Semantic Segmentation + for Face Parsing", AAAI'2020. + + The landmark annotations follow the 106 points mark-up. The definition + can be found in `https://github.com/JDAI-CV/lapa-dataset/`__ . + + Args: + ann_file (str): Annotation file path. Default: ''. + bbox_file (str, optional): Detection result file path. If + ``bbox_file`` is set, detected bboxes loaded from this file will + be used instead of ground-truth bboxes. This setting is only for + evaluation, i.e., ignored when ``test_mode`` is ``False``. + Default: ``None``. + data_mode (str): Specifies the mode of data samples: ``'topdown'`` or + ``'bottomup'``. In ``'topdown'`` mode, each data sample contains + one instance; while in ``'bottomup'`` mode, each data sample + contains all instances in a image. Default: ``'topdown'`` + metainfo (dict, optional): Meta information for dataset, such as class + information. Default: ``None``. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Default: ``None``. + data_prefix (dict, optional): Prefix for training data. Default: + ``dict(img=None, ann=None)``. + filter_cfg (dict, optional): Config for filter data. Default: `None`. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Default: ``None`` which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. + Default: ``True``. + pipeline (list, optional): Processing pipeline. Default: []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Default: ``False``. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=False``. Default: ``False``. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Default: 1000. + """ + + METAINFO: dict = dict(from_file='configs/_base_/datasets/lapa.py') diff --git a/mmpose/datasets/transforms/converting.py b/mmpose/datasets/transforms/converting.py index 0730808967..38dcea0994 100644 --- a/mmpose/datasets/transforms/converting.py +++ b/mmpose/datasets/transforms/converting.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Tuple +from typing import List, Tuple, Union import numpy as np from mmcv.transforms import BaseTransform @@ -25,11 +25,66 @@ class KeypointConverter(BaseTransform): num_keypoints (int): The number of keypoints in target dataset. mapping (list): A list containing mapping indexes. Each element has format (source_index, target_index) + + Example: + >>> import numpy as np + >>> # case 1: 1-to-1 mapping + >>> # (0, 0) means target[0] = source[0] + >>> self = KeypointConverter( + >>> num_keypoints=3, + >>> mapping=[ + >>> (0, 0), (1, 1), (2, 2), (3, 3) + >>> ]) + >>> results = dict( + >>> keypoints=np.arange(34).reshape(2, 3, 2), + >>> keypoints_visible=np.arange(34).reshape(2, 3, 2) % 2) + >>> results = self(results) + >>> assert np.equal(results['keypoints'], + >>> np.arange(34).reshape(2, 3, 2)).all() + >>> assert np.equal(results['keypoints_visible'], + >>> np.arange(34).reshape(2, 3, 2) % 2).all() + >>> + >>> # case 2: 2-to-1 mapping + >>> # ((1, 2), 0) means target[0] = (source[1] + source[2]) / 2 + >>> self = KeypointConverter( + >>> num_keypoints=3, + >>> mapping=[ + >>> ((1, 2), 0), (1, 1), (2, 2) + >>> ]) + >>> results = dict( + >>> keypoints=np.arange(34).reshape(2, 3, 2), + >>> keypoints_visible=np.arange(34).reshape(2, 3, 2) % 2) + >>> results = self(results) """ - def __init__(self, num_keypoints: int, mapping: List[Tuple[int, int]]): + def __init__(self, num_keypoints: int, + mapping: Union[List[Tuple[int, int]], List[Tuple[Tuple, + int]]]): self.num_keypoints = num_keypoints self.mapping = mapping + source_index, target_index = zip(*mapping) + + src1, src2 = [], [] + interpolation = False + for x in source_index: + if isinstance(x, (list, tuple)): + assert len(x) == 2, 'source_index should be a list/tuple of ' \ + 'length 2' + src1.append(x[0]) + src2.append(x[1]) + interpolation = True + else: + src1.append(x) + src2.append(x) + + # When paired source_indexes are input, + # keep a self.source_index2 for interpolation + if interpolation: + self.source_index2 = src2 + + self.source_index = src1 + self.target_index = target_index + self.interpolation = interpolation def transform(self, results: dict) -> dict: num_instances = results['keypoints'].shape[0] @@ -37,10 +92,22 @@ def transform(self, results: dict) -> dict: keypoints = np.zeros((num_instances, self.num_keypoints, 2)) keypoints_visible = np.zeros((num_instances, self.num_keypoints)) - source_index, target_index = zip(*self.mapping) - keypoints[:, target_index] = results['keypoints'][:, source_index] - keypoints_visible[:, target_index] = results[ - 'keypoints_visible'][:, source_index] + # When paired source_indexes are input, + # perform interpolation with self.source_index and self.source_index2 + if self.interpolation: + keypoints[:, self.target_index] = 0.5 * ( + results['keypoints'][:, self.source_index] + + results['keypoints'][:, self.source_index2]) + + keypoints_visible[:, self.target_index] = results[ + 'keypoints_visible'][:, self.source_index] * \ + results['keypoints_visible'][:, self.source_index2] + else: + keypoints[:, + self.target_index] = results['keypoints'][:, self. + source_index] + keypoints_visible[:, self.target_index] = results[ + 'keypoints_visible'][:, self.source_index] results['keypoints'] = keypoints results['keypoints_visible'] = keypoints_visible diff --git a/projects/rtmpose/README.md b/projects/rtmpose/README.md index bdb1db13d1..d697af3dcc 100644 --- a/projects/rtmpose/README.md +++ b/projects/rtmpose/README.md @@ -157,14 +157,14 @@ Feel free to join our community group for more help: ### Body 2d (17 Keypoints) -| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Logs | Download | -| :---------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :--------: | :------------: | -| [RTMPose-t](./rtmpose/body_2d_keypoint/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 68.5 | 3.34 | 0.36 | 3.20 | 1.06 | 9.02 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | -| [RTMPose-s](./rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 72.2 | 5.47 | 0.68 | 4.48 | 1.39 | 13.89 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | -| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 75.8 | 13.59 | 1.93 | 11.06 | 2.29 | 26.44 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | -| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 76.5 | 27.66 | 4.16 | 18.85 | 3.46 | 45.37 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | -| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | -| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 27.79 | 9.35 | - | 6.05 | - | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | +| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | +| :--------------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :-----------------: | +| [RTMPose-t](./rtmpose/body_2d_keypoint/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 68.5 | 3.34 | 0.36 | 3.20 | 1.06 | 9.02 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | +| [RTMPose-s](./rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 72.2 | 5.47 | 0.68 | 4.48 | 1.39 | 13.89 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | +| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 75.8 | 13.59 | 1.93 | 11.06 | 2.29 | 26.44 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | +| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 76.5 | 27.66 | 4.16 | 18.85 | 3.46 | 45.37 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | +| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | +| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | #### Model Pruning @@ -172,29 +172,35 @@ Feel free to join our community group for more help: - Model pruning is supported by [MMRazor](https://github.com/open-mmlab/mmrazor) -| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Logs | Download | -| :---------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :--------: | :------------: | -| RTMPose-s-aic-coco-pruned | 256x192 | 69.4 | 3.43 | 0.35 | - | - | - | [log](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.json) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.pth) | +| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | +| :--------------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :-----------------: | +| RTMPose-s-aic-coco-pruned | 256x192 | 69.4 | 3.43 | 0.35 | - | - | - | [Model](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.pth) | For more details, please refer to [GroupFisher Pruning for RTMPose](./rtmpose/pruning/README.md). ### WholeBody 2d (133 Keypoints) -| Config | Input Size | Whole AP | Whole AR | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :----------------------------- | :--------: | :------: | :------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------: | :-------------------------------: | -| [RTMPose-m](./rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 60.4 | 66.7 | 2.22 | 13.50 | 4.00 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | -| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 63.2 | 69.4 | 4.52 | 23.41 | 5.67 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | -| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 67.0 | 72.3 | 10.07 | 44.58 | 7.68 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | +| Config | Input Size | Whole AP | Whole AR | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :------------------------------------------- | :--------: | :------: | :------: | :------: | :--------------------------------: | :---------------------------------------: | :---------------------------------------------: | +| [RTMPose-m](./rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 60.4 | 66.7 | 2.22 | 13.50 | 4.00 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | +| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 63.2 | 69.4 | 4.52 | 23.41 | 5.67 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | +| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 67.0 | 72.3 | 10.07 | 44.58 | 7.68 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | ### Animal 2d (17 Keypoints) -| Config | Input Size | AP
(AP10K) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :---------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------: | :------------------------------: | -| [RTMPose-m](./rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 72.2 | 2.57 | 14.157 | 2.404 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | +| Config | Input Size | AP
(AP10K) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :-----------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | +| [RTMPose-m](./rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 72.2 | 2.57 | 14.157 | 2.404 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | -### Face 2d +### Face 2d (106 Keypoints) -Coming soon +
+ +
+ +| Config | Input Size | NME
(LaPa) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | +| [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon | ### Hand 2d diff --git a/projects/rtmpose/README_CN.md b/projects/rtmpose/README_CN.md index 5b7979c3ec..0b25ebece2 100644 --- a/projects/rtmpose/README_CN.md +++ b/projects/rtmpose/README_CN.md @@ -148,14 +148,14 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 ### 人体 2d 关键点 (17 Keypoints) -| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Logs | Download | -| :---------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :--------: | :------------: | -| [RTMPose-t](./rtmpose/body_2d_keypoint/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 68.5 | 3.34 | 0.36 | 3.20 | 1.06 | 9.02 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | -| [RTMPose-s](./rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 72.2 | 5.47 | 0.68 | 4.48 | 1.39 | 13.89 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | -| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 75.8 | 13.59 | 1.93 | 11.06 | 2.29 | 26.44 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | -| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 76.5 | 27.66 | 4.16 | 18.85 | 3.46 | 45.37 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | -| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | -| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 27.79 | 9.35 | - | 6.05 | - | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | +| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | +| :--------------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :-----------------: | +| [RTMPose-t](./rtmpose/body_2d_keypoint/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 68.5 | 3.34 | 0.36 | 3.20 | 1.06 | 9.02 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | +| [RTMPose-s](./rtmpose/body_2d_keypoint/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 72.2 | 5.47 | 0.68 | 4.48 | 1.39 | 13.89 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | +| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 75.8 | 13.59 | 1.93 | 11.06 | 2.29 | 26.44 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | +| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 76.5 | 27.66 | 4.16 | 18.85 | 3.46 | 45.37 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | +| [RTMPose-m](./rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py) | 384x288 | 77.0 | 13.72 | 4.33 | 24.78 | 3.66 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | +| [RTMPose-l](./rtmpose/body_2d_keypoint/rtmpose-l_8xb256-420e_coco-384x288.py) | 384x288 | 77.3 | 27.79 | 9.35 | - | 6.05 | - | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | #### 模型剪枝 @@ -163,37 +163,39 @@ RTMPose 是一个长期优化迭代的项目,致力于业务场景下的高性 - 模型剪枝由 [MMRazor](https://github.com/open-mmlab/mmrazor) 提供 -| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Logs | Download | -| :---------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :--------: | :------------: | -| RTMPose-s-aic-coco-pruned | 256x192 | 69.4 | 3.43 | 0.35 | - | - | - | [log](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.json) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.pth) | +| Config | Input Size | AP
(COCO) | Params(M) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | ncnn-FP16-Latency(ms)
(Snapdragon 865) | Download | +| :--------------: | :--------: | :---------------: | :-------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | :-----------------: | +| RTMPose-s-aic-coco-pruned | 256x192 | 69.4 | 3.43 | 0.35 | - | - | - | [Model](https://download.openmmlab.com/mmrazor/v1/pruning/group_fisher/rtmpose-s/group_fisher_finetune_rtmpose-s_8xb256-420e_aic-coco-256x192.pth) | 更多信息,请参考 [GroupFisher Pruning for RTMPose](./rtmpose/pruning/README.md). ### 人体全身 2d 关键点 (133 Keypoints) -| Config | Input Size | Whole AP | Whole AR | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :----------------------------- | :--------: | :------: | :------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------: | :-------------------------------: | -| [RTMPose-m](./rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 60.4 | 66.7 | 2.22 | 13.50 | 4.00 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | -| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 63.2 | 69.4 | 4.52 | 23.41 | 5.67 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | -| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 67.0 | 72.3 | 10.07 | 44.58 | 7.68 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | +| Config | Input Size | Whole AP | Whole AR | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :------------------------------------------- | :--------: | :------: | :------: | :------: | :--------------------------------: | :---------------------------------------: | :---------------------------------------------: | +| [RTMPose-m](./rtmpose/wholebody_2d_keypoint/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 60.4 | 66.7 | 2.22 | 13.50 | 4.00 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | +| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 63.2 | 69.4 | 4.52 | 23.41 | 5.67 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | +| [RTMPose-l](./rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 67.0 | 72.3 | 10.07 | 44.58 | 7.68 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | ### 动物 2d 关键点 (17 Keypoints) -| Config | Input Size | AP
(AP10K) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :---------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------: | :------------------------------: | -| [RTMPose-m](./rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 72.2 | 2.57 | 14.157 | 2.404 | [Log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.json) | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | +| Config | Input Size | AP
(AP10K) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :-----------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :--------------------------------------------: | +| [RTMPose-m](./rtmpose/animal_2d_keypoint/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 72.2 | 2.57 | 14.157 | 2.404 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | -### 脸部 2d 关键点 +### 脸部 2d 关键点 (106 Keypoints) -| Config | Input Size | NME
(COCO-WholeBody-Face) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :--------------------------------------------------: | :--------: | :-------------------------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | :---------: | -| [RTMPose-m](./rtmpose/face_2d_keypoint/wflw/rtmpose-m_8xb64-60e_coco-wholebody-face-256x256.py) | 256x256 | 4.57 | - | - | - | Coming soon | Coming soon | +
+ +
+ +| Config | Input Size | NME
(LaPa) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Download | +| :----------------------------------------------------------------------------: | :--------: | :----------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | +| [RTMPose-m (alpha version)](./rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.70 | - | - | - | Coming soon | ### 手部 2d 关键点 -| Config | Input Size | PCK
(COCO-WholeBody-Hand) | FLOPS(G) | ORT-Latency(ms)
(i7-11700) | TRT-FP16-Latency(ms)
(GTX 1660Ti) | Logs | Download | -| :--------------------------------------------------: | :--------: | :-------------------------------: | :------: | :--------------------------------: | :---------------------------------------: | :---------: | :---------: | -| [RTMPose-m](./rtmpose/hand_2d_keypoint/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | - | - | - | Coming soon | Coming soon | +Coming soon ### 预训练模型 diff --git a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py similarity index 84% rename from projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py rename to projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py index d331dac91a..309414d371 100644 --- a/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py +++ b/projects/rtmpose/rtmpose/face_2d_keypoint/rtmpose-m_8xb64-120e_lapa-256x256.py @@ -1,7 +1,7 @@ _base_ = ['mmpose::_base_/default_runtime.py'] # runtime -max_epochs = 60 +max_epochs = 120 stage2_num_epochs = 10 base_lr = 4e-3 @@ -74,7 +74,7 @@ head=dict( type='RTMCCHead', in_channels=768, - out_channels=68, + out_channels=106, input_size=codec['input_size'], in_featuremap_size=(8, 8), simcc_split_ratio=codec['simcc_split_ratio'], @@ -97,16 +97,16 @@ test_cfg=dict(flip_test=True, )) # base dataset settings -dataset_type = 'CocoWholeBodyFaceDataset' +dataset_type = 'LapaDataset' data_mode = 'topdown' -data_root = 'data/coco/' +data_root = 'data/LaPa/' backend_args = dict(backend='local') # backend_args = dict( # backend='petrel', # path_mapping=dict({ -# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', -# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/', +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/' # })) # pipelines @@ -114,16 +114,17 @@ dict(type='LoadImage', backend_args=backend_args), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), - # dict(type='RandomHalfBody'), + dict(type='RandomHalfBody'), dict( - type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=80), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), dict( type='Albumentation', transforms=[ - dict(type='Blur', p=0.1), - dict(type='MedianBlur', p=0.1), + dict(type='Blur', p=0.2), + dict(type='MedianBlur', p=0.2), dict( type='CoarseDropout', max_holes=1, @@ -185,8 +186,8 @@ type=dataset_type, data_root=data_root, data_mode=data_mode, - ann_file='annotations/coco_wholebody_train_v1.0.json', - data_prefix=dict(img='train2017/'), + ann_file='annotations/lapa_train.json', + data_prefix=dict(img='train/images/'), pipeline=train_pipeline, )) val_dataloader = dict( @@ -199,12 +200,26 @@ type=dataset_type, data_root=data_root, data_mode=data_mode, - ann_file='annotations/coco_wholebody_val_v1.0.json', - data_prefix=dict(img='val2017/'), + ann_file='annotations/lapa_val.json', + data_prefix=dict(img='val/images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_test.json', + data_prefix=dict(img='test/images/'), test_mode=True, pipeline=val_pipeline, )) -test_dataloader = val_dataloader # hooks default_hooks = dict( diff --git a/tests/data/lapa/10773046825_0.jpg b/tests/data/lapa/10773046825_0.jpg new file mode 100644 index 0000000000..ebbc0a3bc5 Binary files /dev/null and b/tests/data/lapa/10773046825_0.jpg differ diff --git a/tests/data/lapa/13609937564_5.jpg b/tests/data/lapa/13609937564_5.jpg new file mode 100644 index 0000000000..d9c2c08682 Binary files /dev/null and b/tests/data/lapa/13609937564_5.jpg differ diff --git a/tests/data/lapa/test_lapa.json b/tests/data/lapa/test_lapa.json new file mode 100644 index 0000000000..0484f08c06 --- /dev/null +++ b/tests/data/lapa/test_lapa.json @@ -0,0 +1,39 @@ +{ + "categories": [ + { + "supercategory": "person", + "id": 1, + "name": "face", + "keypoints": [], + "skeleton": [] + } + ], + "images": [ + {"id": 40, "file_name": "10773046825_0.jpg", "height": 1494, "width": 1424}, + {"id": 41, "file_name": "13609937564_5.jpg", "height": 496, "width": 486} + ], + "annotations": [ + { + "keypoints": [ + 406.0, 644.0, 2.0, 402.0, 682.0, 2.0, 397.0, 719.0, 2.0, 391.0, 757.0, 2.0, 388.0, 795.0, 2.0, 389.0, 834.0, 2.0, 394.0, 874.0, 2.0, 402.0, 913.0, 2.0, 413.0, 952.0, 2.0, 426.0, 989.0, 2.0, 443.0, 1025.0, 2.0, 461.0, 1059.0, 2.0, 481.0, 1092.0, 2.0, 502.0, 1126.0, 2.0, 527.0, 1156.0, 2.0, 559.0, 1180.0, 2.0, 603.0, 1193.0, 2.0, 658.0, 1195.0, 2.0, 713.0, 1187.0, 2.0, 766.0, 1172.0, 2.0, 816.0, 1151.0, 2.0, 863.0, 1128.0, 2.0, 907.0, 1101.0, 2.0, 945.0, 1067.0, 2.0, 978.0, 1029.0, 2.0, 1003.0, 986.0, 2.0, 1019.0, 938.0, 2.0, 1030.0, 888.0, 2.0, 1037.0, 838.0, 2.0, 1040.0, 788.0, 2.0, 1040.0, 739.0, 2.0, 1037.0, 689.0, 2.0, 1033.0, 640.0, 2.0, 417.0, 595.0, 2.0, 445.0, 559.0, 2.0, 488.0, 548.0, 2.0, 535.0, 558.0, 2.0, 569.0, 579.0, 2.0, 562.0, 604.0, 2.0, 526.0, 588.0, 2.0, 487.0, 579.0, 2.0, 451.0, 581.0, 2.0, 662.0, 566.0, 2.0, 713.0, 545.0, 2.0, 777.0, 541.0, 2.0, 839.0, 558.0, 2.0, 887.0, 600.0, 2.0, 832.0, 581.0, 2.0, 777.0, 572.0, 2.0, 721.0, 578.0, 2.0, 669.0, 593.0, 2.0, 614.0, 654.0, 2.0, 602.0, 704.0, 2.0, 590.0, 755.0, 2.0, 577.0, 807.0, 2.0, 573.0, 678.0, 2.0, 540.0, 778.0, 2.0, 518.0, 826.0, 2.0, 538.0, 846.0, 2.0, 562.0, 855.0, 2.0, 592.0, 866.0, 2.0, 632.0, 856.0, 2.0, 668.0, 848.0, 2.0, 703.0, 827.0, 2.0, 681.0, 778.0, 2.0, 667.0, 676.0, 2.0, 447.0, 672.0, 2.0, 472.0, 662.0, 2.0, 499.0, 658.0, 2.0, 526.0, 662.0, 2.0, 550.0, 675.0, 2.0, 524.0, 674.0, 2.0, 498.0, 673.0, 2.0, 472.0, 673.0, 2.0, 501.0, 666.0, 2.0, 701.0, 673.0, 2.0, 729.0, 658.0, 2.0, 760.0, 654.0, 2.0, 792.0, 659.0, 2.0, 822.0, 671.0, 2.0, 791.0, 672.0, 2.0, 761.0, 672.0, 2.0, 731.0, 672.0, 2.0, 762.0, 663.0, 2.0, 503.0, 940.0, 2.0, 532.0, 923.0, 2.0, 575.0, 921.0, 2.0, 602.0, 927.0, 2.0, 631.0, 922.0, 2.0, 704.0, 930.0, 2.0, 775.0, 951.0, 2.0, 735.0, 1001.0, 2.0, 680.0, 1032.0, 2.0, 608.0, 1040.0, 2.0, 553.0, 1023.0, 2.0, 522.0, 987.0, 2.0, 519.0, 945.0, 2.0, 549.0, 937.0, 2.0, 604.0, 944.0, 2.0, 687.0, 942.0, 2.0, 751.0, 955.0, 2.0, 700.0, 996.0, 2.0, 609.0, 1007.0, 2.0, 546.0, 987.0, 2.0, 501.0, 666.0, 2.0, 762.0, 663.0, 2.0], + "image_id": 40, + "id": 40, + "num_keypoints": 106, + "bbox": [388.0, 541.0, 652.0, 654.0], + "iscrowd": 0, + "area": 426408, + "category_id": 1 + }, + { + "keypoints": [ + 179.0, 213.0, 2.0, 176.0, 225.0, 2.0, 173.0, 237.0, 2.0, 170.0, 249.0, 2.0, 167.0, 261.0, 2.0, 166.0, 273.0, 2.0, 165.0, 286.0, 2.0, 166.0, 299.0, 2.0, 170.0, 311.0, 2.0, 176.0, 322.0, 2.0, 184.0, 331.0, 2.0, 194.0, 340.0, 2.0, 206.0, 347.0, 2.0, 218.0, 353.0, 2.0, 231.0, 358.0, 2.0, 244.0, 362.0, 2.0, 258.0, 365.0, 2.0, 269.0, 364.0, 2.0, 278.0, 361.0, 2.0, 286.0, 355.0, 2.0, 293.0, 349.0, 2.0, 300.0, 342.0, 2.0, 306.0, 334.0, 2.0, 311.0, 326.0, 2.0, 315.0, 317.0, 2.0, 318.0, 307.0, 2.0, 321.0, 298.0, 2.0, 323.0, 288.0, 2.0, 323.0, 279.0, 2.0, 323.0, 269.0, 2.0, 322.0, 260.0, 2.0, 321.0, 251.0, 2.0, 322.0, 242.0, 2.0, 207.0, 214.0, 2.0, 220.0, 206.0, 2.0, 236.0, 204.0, 2.0, 253.0, 208.0, 2.0, 266.0, 214.0, 2.0, 263.0, 221.0, 2.0, 250.0, 216.0, 2.0, 235.0, 212.0, 2.0, 221.0, 212.0, 2.0, 293.0, 223.0, 2.0, 302.0, 221.0, 2.0, 313.0, 221.0, 2.0, 321.0, 225.0, 2.0, 325.0, 233.0, 2.0, 318.0, 230.0, 2.0, 311.0, 228.0, 2.0, 302.0, 227.0, 2.0, 293.0, 228.0, 2.0, 277.0, 234.0, 2.0, 280.0, 244.0, 2.0, 283.0, 254.0, 2.0, 285.0, 265.0, 2.0, 261.0, 238.0, 2.0, 256.0, 257.0, 2.0, 248.0, 269.0, 2.0, 256.0, 275.0, 2.0, 266.0, 278.0, 2.0, 275.0, 282.0, 2.0, 282.0, 281.0, 2.0, 288.0, 281.0, 2.0, 293.0, 277.0, 2.0, 291.0, 263.0, 2.0, 285.0, 243.0, 2.0, 220.0, 228.0, 2.0, 228.0, 224.0, 2.0, 237.0, 224.0, 2.0, 245.0, 228.0, 2.0, 251.0, 235.0, 2.0, 243.0, 234.0, 2.0, 234.0, 234.0, 2.0, 226.0, 231.0, 2.0, 232.0, 228.0, 2.0, 287.0, 242.0, 2.0, 293.0, 238.0, 2.0, 301.0, 237.0, 2.0, 307.0, 241.0, 2.0, 311.0, 246.0, 2.0, 306.0, 247.0, 2.0, 299.0, 246.0, 2.0, 293.0, 245.0, 2.0, 297.0, 241.0, 2.0, 222.0, 299.0, 2.0, 242.0, 293.0, 2.0, 263.0, 292.0, 2.0, 271.0, 295.0, 2.0, 279.0, 295.0, 2.0, 288.0, 302.0, 2.0, 292.0, 310.0, 2.0, 286.0, 318.0, 2.0, 277.0, 324.0, 2.0, 263.0, 325.0, 2.0, 246.0, 320.0, 2.0, 233.0, 310.0, 2.0, 229.0, 300.0, 2.0, 246.0, 298.0, 2.0, 269.0, 302.0, 2.0, 282.0, 305.0, 2.0, 289.0, 310.0, 2.0, 280.0, 313.0, 2.0, 265.0, 313.0, 2.0, 243.0, 307.0, 2.0, 232.0, 228.0, 2.0, 297.0, 241.0, 2.0], + "image_id": 41, + "id": 41, + "num_keypoints": 106, + "bbox": [165.0, 204.0, 160.0, 161.0], + "iscrowd": 0, + "area": 25760, + "category_id": 1 + } + ] +} diff --git a/tests/test_datasets/test_datasets/test_face_datasets/test_lapa_dataset.py b/tests/test_datasets/test_datasets/test_face_datasets/test_lapa_dataset.py new file mode 100644 index 0000000000..991f285476 --- /dev/null +++ b/tests/test_datasets/test_datasets/test_face_datasets/test_lapa_dataset.py @@ -0,0 +1,93 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase + +import numpy as np + +from mmpose.datasets.datasets.face import LapaDataset + + +class TestLaPaDataset(TestCase): + + def build_lapa_dataset(self, **kwargs): + + cfg = dict( + ann_file='test_lapa.json', + bbox_file=None, + data_mode='topdown', + data_root='tests/data/lapa', + pipeline=[], + test_mode=False) + + cfg.update(kwargs) + return LapaDataset(**cfg) + + def check_data_info_keys(self, + data_info: dict, + data_mode: str = 'topdown'): + if data_mode == 'topdown': + expected_keys = dict( + img_id=int, + img_path=str, + bbox=np.ndarray, + bbox_score=np.ndarray, + keypoints=np.ndarray, + keypoints_visible=np.ndarray, + id=int) + else: + raise ValueError(f'Invalid data_mode {data_mode}') + + for key, type_ in expected_keys.items(): + self.assertIn(key, data_info) + self.assertIsInstance(data_info[key], type_, key) + + def check_metainfo_keys(self, metainfo: dict): + expected_keys = dict( + dataset_name=str, + num_keypoints=int, + keypoint_id2name=dict, + keypoint_name2id=dict, + upper_body_ids=list, + lower_body_ids=list, + flip_indices=list, + flip_pairs=list, + keypoint_colors=np.ndarray, + num_skeleton_links=int, + skeleton_links=list, + skeleton_link_colors=np.ndarray, + dataset_keypoint_weights=np.ndarray) + + for key, type_ in expected_keys.items(): + self.assertIn(key, metainfo) + self.assertIsInstance(metainfo[key], type_, key) + + def test_metainfo(self): + dataset = self.build_lapa_dataset() + self.check_metainfo_keys(dataset.metainfo) + # test dataset_name + self.assertEqual(dataset.metainfo['dataset_name'], 'lapa') + + # test number of keypoints + num_keypoints = 106 + self.assertEqual(dataset.metainfo['num_keypoints'], num_keypoints) + self.assertEqual( + len(dataset.metainfo['keypoint_colors']), num_keypoints) + self.assertEqual( + len(dataset.metainfo['dataset_keypoint_weights']), num_keypoints) + # note that len(sigmas) may be zero if dataset.metainfo['sigmas'] = [] + self.assertEqual(len(dataset.metainfo['sigmas']), 0) + + def test_topdown(self): + # test topdown training + dataset = self.build_lapa_dataset(data_mode='topdown') + self.assertEqual(dataset.data_mode, 'topdown') + self.assertEqual(dataset.bbox_file, None) + # filter invalid insances due to face_valid = false + self.assertEqual(len(dataset), 2) + self.check_data_info_keys(dataset[0]) + + # test topdown testing + dataset = self.build_lapa_dataset(data_mode='topdown', test_mode=True) + self.assertEqual(dataset.data_mode, 'topdown') + self.assertEqual(dataset.bbox_file, None) + self.assertEqual(len(dataset), 2) + self.check_data_info_keys(dataset[0]) diff --git a/tests/test_datasets/test_transforms/test_converting.py b/tests/test_datasets/test_transforms/test_converting.py index f345a44063..09f06e1e65 100644 --- a/tests/test_datasets/test_transforms/test_converting.py +++ b/tests/test_datasets/test_transforms/test_converting.py @@ -13,6 +13,7 @@ def setUp(self): img_shape=(240, 320), num_instances=4, with_bbox_cs=True) def test_transform(self): + # 1-to-1 mapping mapping = [(3, 0), (6, 1), (16, 2), (5, 3)] transform = KeypointConverter(num_keypoints=5, mapping=mapping) results = transform(self.data_info.copy()) @@ -34,3 +35,39 @@ def test_transform(self): self.assertTrue( (results['keypoints_visible'][:, target_index] == self.data_info['keypoints_visible'][:, source_index]).all()) + + # 2-to-1 mapping + mapping = [((3, 5), 0), (6, 1), (16, 2), (5, 3)] + transform = KeypointConverter(num_keypoints=5, mapping=mapping) + results = transform(self.data_info.copy()) + + # check shape + self.assertEqual(results['keypoints'].shape[0], + self.data_info['keypoints'].shape[0]) + self.assertEqual(results['keypoints'].shape[1], 5) + self.assertEqual(results['keypoints'].shape[2], 2) + self.assertEqual(results['keypoints_visible'].shape[0], + self.data_info['keypoints_visible'].shape[0]) + self.assertEqual(results['keypoints_visible'].shape[1], 5) + + # check value + for source_index, target_index in mapping: + if isinstance(source_index, tuple): + source_index, source_index2 = source_index + self.assertTrue( + (results['keypoints'][:, target_index] == 0.5 * + (self.data_info['keypoints'][:, source_index] + + self.data_info['keypoints'][:, source_index2])).all()) + self.assertTrue( + (results['keypoints_visible'][:, target_index] == + self.data_info['keypoints_visible'][:, source_index] * + self.data_info['keypoints_visible'][:, + source_index2]).all()) + else: + self.assertTrue( + (results['keypoints'][:, target_index] == + self.data_info['keypoints'][:, source_index]).all()) + self.assertTrue( + (results['keypoints_visible'][:, target_index] == + self.data_info['keypoints_visible'][:, + source_index]).all()) diff --git a/tools/dataset_converters/lapa2coco.py b/tools/dataset_converters/lapa2coco.py new file mode 100644 index 0000000000..7727bdf022 --- /dev/null +++ b/tools/dataset_converters/lapa2coco.py @@ -0,0 +1,104 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import os +import os.path as osp +import time + +import cv2 +import mmengine +import numpy as np + + +def default_dump(obj): + """Convert numpy classes to JSON serializable objects.""" + if isinstance(obj, (np.integer, np.floating, np.bool_)): + return obj.item() + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return obj + + +def convert_labpa_to_coco(ann_dir, out_file): + landmark_dir = osp.join(ann_dir, 'landmarks') + ann_list = os.listdir(landmark_dir) + + img_dir = osp.join(ann_dir, 'images') + + annotations = [] + images = [] + cnt = 0 + for idx, ann_file in enumerate(mmengine.track_iter_progress(ann_list)): + cnt += 1 + ann_path = osp.join(landmark_dir, ann_file) + file_name = ann_file[:-4] + '.jpg' + img_path = osp.join(img_dir, file_name) + data_info = open(ann_path).readlines() + + img = cv2.imread(img_path) + + keypoints = [] + for line in data_info[1:]: + x, y = line.strip().split(' ') + x, y = float(x), float(y) + keypoints.append([x, y, 2]) + keypoints = np.array(keypoints) + + x1, y1, _ = np.amin(keypoints, axis=0) + x2, y2, _ = np.amax(keypoints, axis=0) + w, h = x2 - x1, y2 - y1 + bbox = [x1, y1, w, h] + + image = {} + image['id'] = cnt + image['file_name'] = file_name + image['height'] = img.shape[0] + image['width'] = img.shape[1] + images.append(image) + + ann = {} + ann['keypoints'] = keypoints.reshape(-1).tolist() + ann['image_id'] = cnt + ann['id'] = cnt + ann['num_keypoints'] = len(keypoints) + ann['bbox'] = bbox + ann['iscrowd'] = 0 + ann['area'] = int(ann['bbox'][2] * ann['bbox'][3]) + ann['category_id'] = 1 + + annotations.append(ann) + + cocotype = {} + + cocotype['info'] = {} + cocotype['info']['description'] = 'LaPa Generated by MMPose Team' + cocotype['info']['version'] = 1.0 + cocotype['info']['year'] = time.strftime('%Y', time.localtime()) + cocotype['info']['date_created'] = time.strftime('%Y/%m/%d', + time.localtime()) + + cocotype['images'] = images + cocotype['annotations'] = annotations + cocotype['categories'] = [{ + 'supercategory': 'person', + 'id': 1, + 'name': 'face', + 'keypoints': [], + 'skeleton': [] + }] + + json.dump( + cocotype, + open(out_file, 'w'), + ensure_ascii=False, + default=default_dump) + print(f'done {out_file}') + + +if __name__ == '__main__': + if not osp.exists('data/LaPa/annotations'): + os.makedirs('data/LaPa/annotations') + for tv in ['val', 'test', 'train']: + print(f'processing {tv}') + convert_labpa_to_coco(f'data/LaPa/{tv}', + f'data/LaPa/annotations/lapa_{tv}.json')