diff --git a/.gitignore b/.gitignore index df5968c48..163aefc4b 100644 --- a/.gitignore +++ b/.gitignore @@ -172,5 +172,4 @@ annotator/downloads/ # test results and expectations web_tests/results/ -web_tests/expectations/ -*_diff.png \ No newline at end of file +web_tests/expectations/ \ No newline at end of file diff --git a/annotator/openpose/__init__.py b/annotator/openpose/__init__.py index 9f2fd77a0..6602dbf8f 100644 --- a/annotator/openpose/__init__.py +++ b/annotator/openpose/__init__.py @@ -17,18 +17,23 @@ from .body import Body, BodyResult, Keypoint from .hand import Hand from .face import Face -from .wholebody import Wholebody # DW Pose -from .types import PoseResult, HandResult, FaceResult from modules import devices from annotator.annotator_path import models_path -from typing import Tuple, List, Callable, Union, Optional +from typing import NamedTuple, Tuple, List, Callable, Union, Optional body_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/body_pose_model.pth" hand_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/hand_pose_model.pth" face_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/facenet.pth" -remote_dw_model_path = "https://huggingface.co/camenduru/DWPose/resolve/main/dw-ll_ucoco_384.pth" +HandResult = List[Keypoint] +FaceResult = List[Keypoint] + +class PoseResult(NamedTuple): + body: BodyResult + left_hand: Union[HandResult, None] + right_hand: Union[HandResult, None] + face: Union[FaceResult, None] def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True): """ @@ -174,8 +179,6 @@ def __init__(self): self.hand_estimation = None self.face_estimation = None - self.dw_pose_estimation = None - def load_model(self): """ Load the Openpose body, hand, and face models. @@ -195,17 +198,10 @@ def load_model(self): if not os.path.exists(face_modelpath): from basicsr.utils.download_util import load_file_from_url load_file_from_url(face_model_path, model_dir=self.model_dir) - + self.body_estimation = Body(body_modelpath) self.hand_estimation = Hand(hand_modelpath) self.face_estimation = Face(face_modelpath) - - def load_dw_model(self): - dw_modelpath = os.path.join(self.model_dir, "dw-ll_ucoco_384.pth") - if not os.path.exists(dw_modelpath): - from basicsr.utils.download_util import load_file_from_url - load_file_from_url(remote_dw_model_path, model_dir=self.model_dir) - self.dw_pose_estimation = Wholebody(dw_modelpath, device=self.device) def unload_model(self): """ @@ -215,11 +211,6 @@ def unload_model(self): self.body_estimation.model.to("cpu") self.hand_estimation.model.to("cpu") self.face_estimation.model.to("cpu") - - def unload_dw_model(self): - if self.dw_pose_estimation is not None: - self.dw_pose_estimation.detector.to("cpu") - self.dw_pose_estimation.pose_estimator.to("cpu") def detect_hands(self, body: BodyResult, oriImg) -> Tuple[Union[HandResult, None], Union[HandResult, None]]: left_hand = None @@ -278,7 +269,7 @@ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[P self.body_estimation.model.to(self.device) self.hand_estimation.model.to(self.device) - self.face_estimation.model.to(self.device) + self.face_estimation.model.to(self.device) self.body_estimation.cn_device = self.device self.hand_estimation.cn_device = self.device @@ -311,31 +302,10 @@ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[P ), left_hand, right_hand, face)) return results - - def detect_poses_dw(self, oriImg) -> List[PoseResult]: - """ - Detect poses in the given image using DW Pose: - https://github.com/IDEA-Research/DWPose - - Args: - oriImg (numpy.ndarray): The input image for pose detection. - - Returns: - List[PoseResult]: A list of PoseResult objects containing the detected poses. - """ - if self.dw_pose_estimation is None: - self.load_dw_model() - - self.dw_pose_estimation.detector.to(self.device) - self.dw_pose_estimation.pose_estimator.to(self.device) - - with torch.no_grad(): - keypoints_info = self.dw_pose_estimation(oriImg.copy()) - return Wholebody.format_result(keypoints_info) - + def __call__( - self, oriImg, include_body=True, include_hand=False, include_face=False, - use_dw_pose=False, json_pose_callback: Callable[[str], None] = None, + self, oriImg, include_body=True, include_hand=False, include_face=False, + json_pose_callback: Callable[[str], None] = None, ): """ Detect and draw poses in the given image. @@ -345,19 +315,14 @@ def __call__( include_body (bool, optional): Whether to include body keypoints. Defaults to True. include_hand (bool, optional): Whether to include hand keypoints. Defaults to False. include_face (bool, optional): Whether to include face keypoints. Defaults to False. - use_dw_pose (bool, optional): Whether to use DW pose detection algorithm. Defaults to False. json_pose_callback (Callable, optional): A callback that accepts the pose JSON string. Returns: numpy.ndarray: The image with detected and drawn poses. """ H, W, _ = oriImg.shape - - if use_dw_pose: - poses = self.detect_poses_dw(oriImg) - else: - poses = self.detect_poses(oriImg, include_hand, include_face) - + poses = self.detect_poses(oriImg, include_hand, include_face) if json_pose_callback: json_pose_callback(encode_poses_as_json(poses, H, W)) - return draw_poses(poses, H, W, draw_body=include_body, draw_hand=include_hand, draw_face=include_face) + return draw_poses(poses, H, W, draw_body=include_body, draw_hand=include_hand, draw_face=include_face) + \ No newline at end of file diff --git a/annotator/openpose/body.py b/annotator/openpose/body.py index 32934f19e..168dde3c9 100644 --- a/annotator/openpose/body.py +++ b/annotator/openpose/body.py @@ -11,7 +11,24 @@ from . import util from .model import bodypose_model -from .types import Keypoint, BodyResult + +class Keypoint(NamedTuple): + x: float + y: float + score: float = 1.0 + id: int = -1 + + +class BodyResult(NamedTuple): + # Note: Using `Union` instead of `|` operator as the ladder is a Python + # 3.10 feature. + # Annotator code should be Python 3.8 Compatible, as controlnet repo uses + # Python 3.8 environment. + # https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6 + keypoints: List[Union[Keypoint, None]] + total_score: float = 0.0 + total_parts: int = 0 + class Body(object): def __init__(self, model_path): diff --git a/annotator/openpose/dwpose_config/dwpose-l_384x288.py b/annotator/openpose/dwpose_config/dwpose-l_384x288.py deleted file mode 100644 index 8124a1360..000000000 --- a/annotator/openpose/dwpose_config/dwpose-l_384x288.py +++ /dev/null @@ -1,251 +0,0 @@ -# runtime -max_epochs = 270 -stage2_num_epochs = 30 -base_lr = 4e-3 - -train_cfg = dict(max_epochs=max_epochs, val_interval=10) -randomness = dict(seed=21) - -# optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), - paramwise_cfg=dict( - norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', - start_factor=1.0e-5, - by_epoch=False, - begin=0, - end=1000), - dict( - # use cosine lr from 150 to 300 epoch - type='CosineAnnealingLR', - eta_min=base_lr * 0.05, - begin=max_epochs // 2, - end=max_epochs, - T_max=max_epochs // 2, - by_epoch=True, - convert_to_iter_based=True), -] - -# automatically scaling LR based on the actual training batch size -auto_scale_lr = dict(base_batch_size=512) - -# codec settings -codec = dict( - type='SimCCLabel', - input_size=(288, 384), - sigma=(6., 6.93), - simcc_split_ratio=2.0, - normalize=False, - use_dark=False) - -# model settings -model = dict( - type='TopdownPoseEstimator', - data_preprocessor=dict( - type='PoseDataPreprocessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True), - backbone=dict( - _scope_='mmdet', - type='CSPNeXt', - arch='P5', - expand_ratio=0.5, - deepen_factor=1., - widen_factor=1., - out_indices=(4, ), - channel_attention=True, - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU'), - init_cfg=dict( - type='Pretrained', - prefix='backbone.', - checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' - 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa - )), - head=dict( - type='RTMCCHead', - in_channels=1024, - out_channels=133, - input_size=codec['input_size'], - in_featuremap_size=(9, 12), - simcc_split_ratio=codec['simcc_split_ratio'], - final_layer_kernel_size=7, - gau_cfg=dict( - hidden_dims=256, - s=128, - expansion_factor=2, - dropout_rate=0., - drop_path=0., - act_fn='SiLU', - use_rel_bias=False, - pos_enc=False), - loss=dict( - type='KLDiscretLoss', - use_target_weight=True, - beta=10., - label_softmax=True), - decoder=codec), - test_cfg=dict(flip_test=True, )) - -# base dataset settings -dataset_type = 'CocoWholeBodyDataset' -data_mode = 'topdown' -data_root = '/data/' - -backend_args = dict(backend='local') - -# pipelines -train_pipeline = [ - dict(type='LoadImage', backend_args=backend_args), - dict(type='GetBBoxCenterScale'), - dict(type='RandomFlip', direction='horizontal'), - dict(type='RandomHalfBody'), - dict( - type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), - dict(type='TopdownAffine', input_size=codec['input_size']), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='Albumentation', - transforms=[ - dict(type='Blur', p=0.1), - dict(type='MedianBlur', p=0.1), - dict( - type='CoarseDropout', - max_holes=1, - max_height=0.4, - max_width=0.4, - min_holes=1, - min_height=0.2, - min_width=0.2, - p=1.0), - ]), - dict(type='GenerateTarget', encoder=codec), - dict(type='PackPoseInputs') -] -val_pipeline = [ - dict(type='LoadImage', backend_args=backend_args), - dict(type='GetBBoxCenterScale'), - dict(type='TopdownAffine', input_size=codec['input_size']), - dict(type='PackPoseInputs') -] - -train_pipeline_stage2 = [ - dict(type='LoadImage', backend_args=backend_args), - dict(type='GetBBoxCenterScale'), - dict(type='RandomFlip', direction='horizontal'), - dict(type='RandomHalfBody'), - dict( - type='RandomBBoxTransform', - shift_factor=0., - scale_factor=[0.75, 1.25], - rotate_factor=60), - dict(type='TopdownAffine', input_size=codec['input_size']), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='Albumentation', - transforms=[ - dict(type='Blur', p=0.1), - dict(type='MedianBlur', p=0.1), - dict( - type='CoarseDropout', - max_holes=1, - max_height=0.4, - max_width=0.4, - min_holes=1, - min_height=0.2, - min_width=0.2, - p=0.5), - ]), - dict(type='GenerateTarget', encoder=codec), - dict(type='PackPoseInputs') -] - -datasets = [] -dataset_coco=dict( - type=dataset_type, - data_root=data_root, - data_mode=data_mode, - ann_file='coco/annotations/coco_wholebody_train_v1.0.json', - data_prefix=dict(img='coco/train2017/'), - pipeline=[], -) -datasets.append(dataset_coco) - -scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class', - 'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow', - 'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference'] - -for i in range(len(scene)): - datasets.append( - dict( - type=dataset_type, - data_root=data_root, - data_mode=data_mode, - ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json', - data_prefix=dict(img='UBody/images/'+scene[i]+'/'), - pipeline=[], - ) - ) - -# data loaders -train_dataloader = dict( - batch_size=32, - num_workers=10, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type='CombinedDataset', - metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'), - datasets=datasets, - pipeline=train_pipeline, - test_mode=False, - )) -val_dataloader = dict( - batch_size=32, - num_workers=10, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - data_mode=data_mode, - ann_file='coco/annotations/coco_wholebody_val_v1.0.json', - bbox_file=f'{data_root}coco/person_detection_results/' - 'COCO_val2017_detections_AP_H_56_person.json', - data_prefix=dict(img='coco/val2017/'), - test_mode=True, - pipeline=val_pipeline, - )) -test_dataloader = val_dataloader - -# hooks -default_hooks = dict( - checkpoint=dict( - save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=max_epochs - stage2_num_epochs, - switch_pipeline=train_pipeline_stage2) -] - -# evaluators -val_evaluator = dict( - type='CocoWholeBodyMetric', - ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json') -test_evaluator = val_evaluator diff --git a/annotator/openpose/types.py b/annotator/openpose/types.py deleted file mode 100644 index e521e65dc..000000000 --- a/annotator/openpose/types.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import NamedTuple, List, Optional - -class Keypoint(NamedTuple): - x: float - y: float - score: float = 1.0 - id: int = -1 - - -class BodyResult(NamedTuple): - # Note: Using `Optional` instead of `|` operator as the ladder is a Python - # 3.10 feature. - # Annotator code should be Python 3.8 Compatible, as controlnet repo uses - # Python 3.8 environment. - # https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6 - keypoints: List[Optional[Keypoint]] - total_score: float = 0.0 - total_parts: int = 0 - - -HandResult = List[Keypoint] -FaceResult = List[Keypoint] - - -class PoseResult(NamedTuple): - body: BodyResult - left_hand: Optional[HandResult] - right_hand: Optional[HandResult] - face: Optional[FaceResult] diff --git a/annotator/openpose/util.py b/annotator/openpose/util.py index facfde9e1..9175b8e47 100644 --- a/annotator/openpose/util.py +++ b/annotator/openpose/util.py @@ -2,7 +2,7 @@ import numpy as np import matplotlib import cv2 -from typing import List, Tuple, Union, Optional +from typing import List, Tuple, Union from .body import BodyResult, Keypoint @@ -67,9 +67,6 @@ def transfer(model, model_weights): return transfered_model_weights -def is_normalized(keypoints: List[Optional[Keypoint]]) -> bool: - return all(0 <= abs(k.x) <= 1 and 0 <= abs(k.y) <= 1 for k in keypoints if k is not None) - def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: """ Draw keypoints and limbs representing body pose on a given canvas. @@ -84,11 +81,7 @@ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: Note: The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1. """ - if not is_normalized(keypoints): - H, W = 1.0, 1.0 - else: - H, W, _ = canvas.shape - + H, W, C = canvas.shape stickwidth = 4 limbSeq = [ @@ -149,10 +142,7 @@ def draw_handpose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> if not keypoints: return canvas - if not is_normalized(keypoints): - H, W = 1.0, 1.0 - else: - H, W, _ = canvas.shape + H, W, C = canvas.shape edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] @@ -200,11 +190,7 @@ def draw_facepose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> if not keypoints: return canvas - if not is_normalized(keypoints): - H, W = 1.0, 1.0 - else: - H, W, _ = canvas.shape - + H, W, C = canvas.shape for keypoint in keypoints: if keypoint is None: continue diff --git a/annotator/openpose/wholebody.py b/annotator/openpose/wholebody.py deleted file mode 100644 index 7f2640596..000000000 --- a/annotator/openpose/wholebody.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import numpy as np -from mmpose.apis import inference_topdown -from mmpose.apis import init_model as init_pose_estimator -from mmpose.evaluation.functional import nms -from mmpose.utils import adapt_mmdet_pipeline -from mmpose.structures import merge_data_samples - -from mmdet.apis import inference_detector, init_detector - -import os -from typing import List, Optional -from .types import PoseResult, BodyResult, Keypoint - -def get_current_file_directory(): - return os.path.dirname(os.path.realpath(__file__)) - -class Wholebody: - def __init__(self, dw_modelpath: str, device: str): - directory = get_current_file_directory() - - det_config = f"{directory}/yolox_config/yolox_l_8xb8-300e_coco.py" - det_ckpt = "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth" - pose_config = f"{directory}/dwpose_config/dwpose-l_384x288.py" - pose_ckpt = dw_modelpath - - # build detector - self.detector = init_detector(det_config, det_ckpt, device=device) - self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg) - - # build pose estimator - self.pose_estimator = init_pose_estimator(pose_config, pose_ckpt, device=device) - - def __call__(self, oriImg): - # predict bbox - det_result = inference_detector(self.detector, oriImg) - pred_instance = det_result.pred_instances.cpu().numpy() - bboxes = np.concatenate( - (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1 - ) - bboxes = bboxes[ - np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.3) - ] - - bboxes = bboxes[nms(bboxes, 0.3), :4] - - # predict keypoints - if len(bboxes) == 0: - pose_results = inference_topdown(self.pose_estimator, oriImg) - else: - pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes) - preds = merge_data_samples(pose_results) - preds = preds.pred_instances - - keypoints = preds.get("transformed_keypoints", preds.keypoints) - if "keypoint_scores" in preds: - scores = preds.keypoint_scores - else: - scores = np.ones(keypoints.shape[:-1]) - - if "keypoints_visible" in preds: - visible = preds.keypoints_visible - else: - visible = np.ones(keypoints.shape[:-1]) - keypoints_info = np.concatenate( - (keypoints, scores[..., None], visible[..., None]), axis=-1 - ) - # compute neck joint - neck = np.mean(keypoints_info[:, [5, 6]], axis=1) - # neck score when visualizing pred - neck[:, 2:4] = np.logical_and( - keypoints_info[:, 5, 2:4] > 0.3, keypoints_info[:, 6, 2:4] > 0.3 - ).astype(int) - new_keypoints_info = np.insert(keypoints_info, 17, neck, axis=1) - mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3] - openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17] - new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx] - keypoints_info = new_keypoints_info - - # [person_id, keypoint_id, [x, y, score]] - return keypoints_info[..., :3] - - @staticmethod - def format_result(keypoints_info: np.ndarray) -> List[PoseResult]: - def format_keypoint_part(part: np.ndarray) -> Optional[List[Keypoint]]: - keypoints = [ - Keypoint(x, y, score, i) if score >= 0.3 else None - for i, (x, y, score) in enumerate(part) - ] - return None if all(keypoint is None for keypoint in keypoints) else keypoints - - def total_score(keypoints: List[Keypoint]) -> float: - return sum(keypoint.score for keypoint in keypoints if keypoint is not None) - - pose_results = [] - - for instance in keypoints_info: - body_keypoints = format_keypoint_part(instance[:18]) - left_hand = format_keypoint_part(instance[92:113]) - right_hand = format_keypoint_part(instance[113:134]) - face = format_keypoint_part(instance[24:92]) - - body = BodyResult(body_keypoints, total_score(body_keypoints), len(body_keypoints)) - pose_results.append(PoseResult(body, left_hand, right_hand, face)) - - return pose_results - - diff --git a/annotator/openpose/yolox_config/yolox_l_8xb8-300e_coco.py b/annotator/openpose/yolox_config/yolox_l_8xb8-300e_coco.py deleted file mode 100644 index 7b4cb5a4b..000000000 --- a/annotator/openpose/yolox_config/yolox_l_8xb8-300e_coco.py +++ /dev/null @@ -1,245 +0,0 @@ -img_scale = (640, 640) # width, height - -# model settings -model = dict( - type='YOLOX', - data_preprocessor=dict( - type='DetDataPreprocessor', - pad_size_divisor=32, - batch_augments=[ - dict( - type='BatchSyncRandomResize', - random_size_range=(480, 800), - size_divisor=32, - interval=10) - ]), - backbone=dict( - type='CSPDarknet', - deepen_factor=1.0, - widen_factor=1.0, - out_indices=(2, 3, 4), - use_depthwise=False, - spp_kernal_sizes=(5, 9, 13), - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='Swish'), - ), - neck=dict( - type='YOLOXPAFPN', - in_channels=[256, 512, 1024], - out_channels=256, - num_csp_blocks=3, - use_depthwise=False, - upsample_cfg=dict(scale_factor=2, mode='nearest'), - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='Swish')), - bbox_head=dict( - type='YOLOXHead', - num_classes=80, - in_channels=256, - feat_channels=256, - stacked_convs=2, - strides=(8, 16, 32), - use_depthwise=False, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='Swish'), - loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - loss_bbox=dict( - type='IoULoss', - mode='square', - eps=1e-16, - reduction='sum', - loss_weight=5.0), - loss_obj=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)), - train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), - # In order to align the source code, the threshold of the val phase is - # 0.01, and the threshold of the test phase is 0.001. - test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65))) - -# dataset settings -data_root = 'data/coco/' -dataset_type = 'CocoDataset' - -# Example to use different file client -# Method 1: simply set the data root and let the file I/O module -# automatically infer from prefix (not support LMDB and Memcache yet) - -# data_root = 's3://openmmlab/datasets/detection/coco/' - -# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 -# backend_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) -backend_args = None - -train_pipeline = [ - dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), - dict( - type='RandomAffine', - scaling_ratio_range=(0.1, 2), - # img_scale is (width, height) - border=(-img_scale[0] // 2, -img_scale[1] // 2)), - dict( - type='MixUp', - img_scale=img_scale, - ratio_range=(0.8, 1.6), - pad_val=114.0), - dict(type='YOLOXHSVRandomAug'), - dict(type='RandomFlip', prob=0.5), - # According to the official implementation, multi-scale - # training is not considered here but in the - # 'mmdet/models/detectors/yolox.py'. - # Resize and Pad are for the last 15 epochs when Mosaic, - # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook. - dict(type='Resize', scale=img_scale, keep_ratio=True), - dict( - type='Pad', - pad_to_square=True, - # If the image is three-channel, the pad value needs - # to be set separately for each channel. - pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type='PackDetInputs') -] - -train_dataset = dict( - # use MultiImageMixDataset wrapper to support mosaic and mixup - type='MultiImageMixDataset', - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - pipeline=[ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True) - ], - filter_cfg=dict(filter_empty_gt=False, min_size=32), - backend_args=backend_args), - pipeline=train_pipeline) - -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=img_scale, keep_ratio=True), - dict( - type='Pad', - pad_to_square=True, - pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] - -train_dataloader = dict( - batch_size=8, - num_workers=4, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=train_dataset) -val_dataloader = dict( - batch_size=8, - num_workers=4, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args)) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox', - backend_args=backend_args) -test_evaluator = val_evaluator - -# training settings -max_epochs = 300 -num_last_epochs = 15 -interval = 10 - -train_cfg = dict(max_epochs=max_epochs, val_interval=interval) - -# optimizer -# default 8 gpu -base_lr = 0.01 -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict( - type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4, - nesterov=True), - paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) - -# learning rate -param_scheduler = [ - dict( - # use quadratic formula to warm up 5 epochs - # and lr is updated by iteration - # TODO: fix default scope in get function - type='mmdet.QuadraticWarmupLR', - by_epoch=True, - begin=0, - end=5, - convert_to_iter_based=True), - dict( - # use cosine lr from 5 to 285 epoch - type='CosineAnnealingLR', - eta_min=base_lr * 0.05, - begin=5, - T_max=max_epochs - num_last_epochs, - end=max_epochs - num_last_epochs, - by_epoch=True, - convert_to_iter_based=True), - dict( - # use fixed lr during last 15 epochs - type='ConstantLR', - by_epoch=True, - factor=1, - begin=max_epochs - num_last_epochs, - end=max_epochs, - ) -] - -default_hooks = dict( - checkpoint=dict( - interval=interval, - max_keep_ckpts=3 # only keep latest 3 checkpoints - )) - -custom_hooks = [ - dict( - type='YOLOXModeSwitchHook', - num_last_epochs=num_last_epochs, - priority=48), - dict(type='SyncNormHook', priority=48), - dict( - type='EMAHook', - ema_type='ExpMomentumEMA', - momentum=0.0001, - update_buffers=True, - priority=49) -] - -# NOTE: `auto_scale_lr` is for automatically scaling LR, -# USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (8 samples per GPU) -auto_scale_lr = dict(base_batch_size=64) diff --git a/install.py b/install.py index 04cc607f1..b3c25183c 100644 --- a/install.py +++ b/install.py @@ -1,7 +1,6 @@ import launch import os import pkg_resources -import subprocess req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt") @@ -18,20 +17,4 @@ launch.run_pip(f"install {package}", f"sd-webui-controlnet requirement: {package}") except Exception as e: print(e) - print(f'Warning: Failed to install {package}, some preprocessors may not work.') - - -# DW Pose dependencies. -mim_packages = [ - "mmengine", - "mmcv>=2.0.1", - "mmdet>=3.1.0", - "mmpose>=1.1.0", -] -for package in mim_packages: - try: - package_name = package.split('>=')[0] - if not launch.is_installed(package_name): - subprocess.call(["mim", "install", package]) - except Exception as e: - print(f'Warning: Failed to install {package}. {e}') + print(f'Warning: Failed to install {package}, some preprocessors may not work.') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 71446cd4d..573936d8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ mediapipe svglib fvcore -openmim \ No newline at end of file diff --git a/scripts/global_state.py b/scripts/global_state.py index 98afbbb61..ca17bfbaf 100644 --- a/scripts/global_state.py +++ b/scripts/global_state.py @@ -62,7 +62,6 @@ def unified_preprocessor(preprocessor_name: str, *args, **kwargs): "openpose_face": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=False, include_face=True), "openpose_faceonly": functools.partial(g_openpose_model.run_model, include_body=False, include_hand=False, include_face=True), "openpose_full": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=True, include_face=True), - "dw_openpose_full": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=True, include_face=True, use_dw_pose=True), "clip_vision": clip, "color": color, "pidinet": pidinet, @@ -108,7 +107,6 @@ def unified_preprocessor(preprocessor_name: str, *args, **kwargs): "openpose_hand": g_openpose_model.unload, "openpose_face": g_openpose_model.unload, "openpose_full": g_openpose_model.unload, - "dw_openpose_full": g_openpose_model.unload, "segmentation": unload_uniformer, "depth_zoe": unload_zoe_depth, "normal_bae": unload_normal_bae, diff --git a/scripts/processor.py b/scripts/processor.py index 5b68759e1..12dae6bc5 100644 --- a/scripts/processor.py +++ b/scripts/processor.py @@ -4,18 +4,6 @@ from annotator.util import HWC3 from typing import Callable, Tuple -from modules import safe -import mmengine - - -def torch_extra_handler(module, name): - """ Register HistoryBuffer to whitelist, so that A1111 does not complain. - This is for mmengine used by DW Pose detector. - """ - if module == 'mmengine.logging.history_buffer' and name in ['HistoryBuffer']: - return mmengine.logging.history_buffer.HistoryBuffer - return None - def pad64(x): return int(np.ceil(float(x) / 64.0) * 64 - x) @@ -241,7 +229,6 @@ def run_model( include_body: bool, include_hand: bool, include_face: bool, - use_dw_pose: bool = False, json_pose_callback: Callable[[str], None] = None, res: int = 512, **kwargs # Ignore rest of kwargs @@ -257,25 +244,22 @@ def run_model( img, remove_pad = resize_image_with_pad(img, res) - with safe.Extra(torch_extra_handler): - if self.model_openpose is None: - from annotator.openpose import OpenposeDetector - self.model_openpose = OpenposeDetector() + if self.model_openpose is None: + from annotator.openpose import OpenposeDetector + self.model_openpose = OpenposeDetector() - return remove_pad(self.model_openpose( - img, - include_body=include_body, - include_hand=include_hand, - include_face=include_face, - use_dw_pose=use_dw_pose, - json_pose_callback=json_pose_callback - )), True + return remove_pad(self.model_openpose( + img, + include_body=include_body, + include_hand=include_hand, + include_face=include_face, + json_pose_callback=json_pose_callback + )), True def unload(self): - with safe.Extra(torch_extra_handler): - if self.model_openpose is not None: - self.model_openpose.unload_model() - self.model_openpose.unload_dw_model() + if self.model_openpose is not None: + self.model_openpose.unload_model() + g_openpose_model = OpenposeModel() @@ -704,14 +688,6 @@ def shuffle(img, res=512, **kwargs): "value": 512 } ], - "dw_openpose_full": [ - { - "name": flag_preprocessor_resolution, - "min": 64, - "max": 2048, - "value": 512 - } - ], "segmentation": [ { "name": flag_preprocessor_resolution, diff --git a/tests/annotator_tests/openpose_tests/openpose_e2e_test.py b/tests/annotator_tests/openpose_tests/openpose_e2e_test.py index ad6559189..b6e85ac6d 100644 --- a/tests/annotator_tests/openpose_tests/openpose_e2e_test.py +++ b/tests/annotator_tests/openpose_tests/openpose_e2e_test.py @@ -90,29 +90,6 @@ def test_all(self): ), overwrite_expectation=False ) - - def test_dw(self): - # Need following code to pass A1111 safety check. - import mmengine - from modules import safe - - def handler(module, name): - if module == 'mmengine.logging.history_buffer' and name in ['HistoryBuffer']: - return mmengine.logging.history_buffer.HistoryBuffer - return None - - with safe.Extra(handler): - self.template( - test_image = f'{TestOpenposeDetector.image_path}/woman.jpeg', - expected_image = f'{TestOpenposeDetector.image_path}/expected_woman_dw_all_output.png', - detector_config=dict( - include_body=True, - include_face=True, - include_hand=True, - use_dw_pose=True, - ), - overwrite_expectation=False, - ) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/images/expected_woman_dw_all_output.png b/tests/images/expected_woman_dw_all_output.png deleted file mode 100644 index 50d502ca8..000000000 Binary files a/tests/images/expected_woman_dw_all_output.png and /dev/null differ