diff --git a/bodymocap/body_bbox_detector.py b/bodymocap/body_bbox_detector.py index 0aac428..7b3c181 100644 --- a/bodymocap/body_bbox_detector.py +++ b/bodymocap/body_bbox_detector.py @@ -27,7 +27,8 @@ class BodyPoseEstimator(object): Hand Detector for third-view input. It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git) """ - def __init__(self): + def __init__(self, use_cuda=True): + self.use_cuda = use_cuda print("Loading Body Pose Estimator") self.__load_body_estimator() @@ -38,13 +39,14 @@ def __load_body_estimator(self): checkpoint = torch.load(pose2d_checkpoint, map_location='cpu') load_state(net, checkpoint) net = net.eval() - net = net.cuda() + if self.use_cuda: + net = net.cuda() self.model = net #Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py def __infer_fast(self, img, input_height_size, stride, upsample_ratio, - cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): + use_cuda=True, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): height, width, _ = img.shape scale = input_height_size / height @@ -54,7 +56,7 @@ def __infer_fast(self, img, input_height_size, stride, upsample_ratio, padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims) tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float() - if not cpu: + if use_cuda: tensor_img = tensor_img.cuda() stages_output = self.model(tensor_img) @@ -79,8 +81,10 @@ def detect_body_pose(self, img): orig_img = img.copy() # forward - heatmaps, pafs, scale, pad = self.__infer_fast(img, - input_height_size=256, stride=stride, upsample_ratio=upsample_ratio) + heatmaps, pafs, scale, pad = self.__infer_fast( + img, input_height_size=256, stride=stride, upsample_ratio=upsample_ratio, + use_cuda=self.use_cuda + ) total_keypoints_num = 0 all_keypoints_by_type = [] diff --git a/bodymocap/body_mocap_api.py b/bodymocap/body_mocap_api.py index e929d2e..253fe79 100644 --- a/bodymocap/body_mocap_api.py +++ b/bodymocap/body_mocap_api.py @@ -17,7 +17,7 @@ class BodyMocap(object): def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'), use_smplx=False): - self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + self.device = device # Load parametric model (SMPLX or SMPL) if use_smplx: @@ -36,7 +36,7 @@ def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'), #Load pre-trained neural network SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz' self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device) - checkpoint = torch.load(regressor_checkpoint) + checkpoint = torch.load(regressor_checkpoint, map_location=device) self.model_regressor.load_state_dict(checkpoint['model'], strict=False) self.model_regressor.eval() @@ -74,7 +74,7 @@ def regress(self, img_original, body_bbox_list): #Convert rot_mat to aa since hands are always in aa # pred_aa = rotmat3x3_to_angle_axis(pred_rotmat) - pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda() + pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).to(self.device) pred_aa = pred_aa.reshape(pred_aa.shape[0], 72) smpl_output = self.smpl( betas=pred_betas, diff --git a/demo/demo_bodymocap.py b/demo/demo_bodymocap.py index 704b77f..0028a30 100644 --- a/demo/demo_bodymocap.py +++ b/demo/demo_bodymocap.py @@ -153,10 +153,10 @@ def main(): args = DemoOptions().parse() device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - assert torch.cuda.is_available(), "Current version only supports GPU" + use_cuda = device.type == 'cuda' # Set bbox detector - body_bbox_detector = BodyPoseEstimator() + body_bbox_detector = BodyPoseEstimator(use_cuda=use_cuda) # Set mocap regressor use_smplx = args.use_smplx diff --git a/demo/demo_frankmocap.py b/demo/demo_frankmocap.py index e920261..42ad13b 100644 --- a/demo/demo_frankmocap.py +++ b/demo/demo_frankmocap.py @@ -45,7 +45,8 @@ def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person): def run_regress( args, img_original_bgr, body_bbox_list, hand_bbox_list, bbox_detector, - body_mocap, hand_mocap + body_mocap, hand_mocap, + use_cuda=True ): cond1 = len(body_bbox_list) > 0 and len(hand_bbox_list) > 0 cond2 = not args.frankmocap_fast_mode @@ -102,12 +103,14 @@ def run_regress( # integration by copy-and-paste integral_output_list = integration_copy_paste( - pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape) + pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape, + use_cuda=use_cuda + ) return body_bbox_list, hand_bbox_list, integral_output_list -def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer): +def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer, use_cuda=True): #Setup input data to handle different types of inputs input_type, input_data = demo_utils.setup_input(args) @@ -176,7 +179,9 @@ def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer): body_bbox_list, hand_bbox_list, pred_output_list = run_regress( args, img_original_bgr, body_bbox_list, hand_bbox_list, bbox_detector, - body_mocap, hand_mocap) + body_mocap, hand_mocap, + use_cuda=use_cuda + ) # save the obtained body & hand bbox to json file if args.save_bbox_output: @@ -225,7 +230,7 @@ def main(): args.use_smplx = True device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - assert torch.cuda.is_available(), "Current version only supports GPU" + use_cuda = device.type == 'cuda' hand_bbox_detector = HandBboxDetector('third_view', device) @@ -240,7 +245,7 @@ def main(): from renderer.visualizer import Visualizer visualizer = Visualizer(args.renderer_type) - run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer) + run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer, use_cuda=use_cuda) if __name__ == '__main__': diff --git a/demo/demo_handmocap.py b/demo/demo_handmocap.py index 2262c33..939fec0 100644 --- a/demo/demo_handmocap.py +++ b/demo/demo_handmocap.py @@ -156,7 +156,6 @@ def main(): args.use_smplx = True device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - assert torch.cuda.is_available(), "Current version only supports GPU" #Set Bbox detector bbox_detector = HandBboxDetector(args.view_type, device) diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 22cd56e..9b9f6c3 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -3,7 +3,10 @@ ## Installing All Modules - The entire modules can be installed following the instruction below. - Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details. + Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details. + + Also note that the calculations can be performed on CPU thus CUDA installation is not obligatory. + Nevertheless, some renderers can demand on GPU so if you have no luck with one renderer, please, try another. - The basic installation ``` diff --git a/handmocap/hand_bbox_detector.py b/handmocap/hand_bbox_detector.py index 2aac9a8..abeb84e 100644 --- a/handmocap/hand_bbox_detector.py +++ b/handmocap/hand_bbox_detector.py @@ -42,8 +42,8 @@ class Third_View_Detector(BodyPoseEstimator): It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git) with a type-agnostic hand detector (https://github.com/ddshan/hand_detector.d2) """ - def __init__(self): - super(Third_View_Detector, self).__init__() + def __init__(self, use_cuda=True): + super(Third_View_Detector, self).__init__(use_cuda=use_cuda) print("Loading Third View Hand Detector") self.__load_hand_detector() @@ -51,6 +51,8 @@ def __init__(self): def __load_hand_detector(self): # load cfg and model cfg = get_cfg() + if not self.use_cuda: + cfg.MODEL.DEVICE = 'cpu' cfg.merge_from_file("detectors/hand_only_detector/faster_rcnn_X_101_32x8d_FPN_3x_100DOH.yaml") cfg.MODEL.WEIGHTS = 'extra_data/hand_module/hand_detector/model_0529999.pth' # add model weight here cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 # 0.3 , use low thresh to increase recall @@ -139,8 +141,8 @@ class Ego_Centric_Detector(BodyPoseEstimator): It uses type-aware hand detector: (https://github.com/ddshan/hand_object_detector) """ - def __init__(self): - super(Ego_Centric_Detector, self).__init__() + def __init__(self, use_cuda=True): + super(Ego_Centric_Detector, self).__init__(use_cuda=use_cuda) print("Loading Ego Centric Hand Detector") self.__load_hand_detector() @@ -158,8 +160,9 @@ def __load_hand_detector(self): fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfgg.POOLING_MODE = checkpoint['pooling_mode'] - - fasterRCNN.cuda() + + if self.use_cuda: + fasterRCNN.cuda() fasterRCNN.eval() self.hand_detector = fasterRCNN @@ -196,11 +199,12 @@ def __get_image_blob(self, im): # part of the code comes from https://github.com/ddshan/hand_object_detector/demo.py def __get_raw_hand_bbox(self, img): with torch.no_grad(): - im_data = torch.FloatTensor(1).cuda() - im_info = torch.FloatTensor(1).cuda() - num_boxes = torch.LongTensor(1).cuda() - gt_boxes = torch.FloatTensor(1).cuda() - box_info = torch.FloatTensor(1).cuda() + if self.use_cuda: + im_data = torch.FloatTensor(1).cuda() + im_info = torch.FloatTensor(1).cuda() + num_boxes = torch.LongTensor(1).cuda() + gt_boxes = torch.FloatTensor(1).cuda() + box_info = torch.FloatTensor(1).cuda() im_blob, im_scales = self.__get_image_blob(img) @@ -231,10 +235,13 @@ def __get_raw_hand_bbox(self, img): lr = lr.squeeze(0).float() box_deltas = bbox_pred.data - stds = [0.1, 0.1, 0.2, 0.2] - means = [0.0, 0.0, 0.0, 0.0] - box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(stds).cuda() \ - + torch.FloatTensor(means).cuda() + stds = torch.FloatTensor([0.1, 0.1, 0.2, 0.2]) + means = torch.FloatTensor([0.0, 0.0, 0.0, 0.0]) + if self.use_cuda: + stds = stds.cuda() + means = stds.cuda() + box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(stds) \ + + torch.FloatTensor(means) box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) @@ -303,12 +310,13 @@ def __init__(self, view_type, device): args: view_type: third_view or ego_centric. """ + use_cuda = device.type == 'cuda' self.view_type = view_type if view_type == "ego_centric": - self.model = Ego_Centric_Detector() + self.model = Ego_Centric_Detector(use_cuda=use_cuda) elif view_type == "third_view": - self.model = Third_View_Detector() + self.model = Third_View_Detector(use_cuda=use_cuda) else : print("Invalid view_type") assert False diff --git a/handmocap/hand_mocap_api.py b/handmocap/hand_mocap_api.py index 1567c4f..477a9cb 100644 --- a/handmocap/hand_mocap_api.py +++ b/handmocap/hand_mocap_api.py @@ -41,7 +41,8 @@ def __init__(self, regressor_checkpoint, smpl_dir, device = torch.device('cuda') self.opt.process_rank = -1 # self.opt.which_epoch = str(epoch) - self.model_regressor = H3DWModel(self.opt) + use_cuda = device.type == "cuda" + self.model_regressor = H3DWModel(self.opt, use_cuda=use_cuda) # if there is no specified checkpoint, then skip assert self.model_regressor.success_load, "Specificed checkpoints does not exists: {}".format(self.opt.checkpoint_path) self.model_regressor.eval() diff --git a/handmocap/hand_modules/h3dw_model.py b/handmocap/hand_modules/h3dw_model.py index bdd64b9..0e77e32 100644 --- a/handmocap/hand_modules/h3dw_model.py +++ b/handmocap/hand_modules/h3dw_model.py @@ -71,9 +71,13 @@ class H3DWModel(object): def name(self): return 'H3DWModel' - def __init__(self, opt): + def __init__(self, opt, use_cuda=True): self.opt = opt - self.Tensor = torch.cuda.FloatTensor + self.use_cuda = use_cuda + if use_cuda: + self.Tensor = torch.cuda.FloatTensor + else: + self.Tensor = torch.FloatTensor # set params self.inputSize = opt.inputSize @@ -123,11 +127,17 @@ def __init__(self, opt): gender = 'neutral', num_betas = 10, use_pca = False, - ext='pkl').cuda() + ext='pkl') + if use_cuda: + self.smplx.cuda() # set encoder and optimizer - self.encoder = H3DWEncoder(opt, self.mean_params).cuda() + self.encoder = H3DWEncoder(opt, self.mean_params, use_cuda=use_cuda) + if use_cuda: + self.encoder = self.encoder.cuda() if opt.dist: + if not use_cuda: + raise NotImplementedError("No support for DistributedDataParallel and non-CUDA device") self.encoder = DistributedDataParallel( self.encoder, device_ids=[torch.cuda.current_device()]) @@ -136,11 +146,11 @@ def __init__(self, opt): print(f"Error: {checkpoint_path} does not exists") self.success_load = False else: - if self.opt.dist: + if opt.dist: self.encoder.module.load_state_dict(torch.load( checkpoint_path, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device()))) else: - saved_weights = torch.load(checkpoint_path) + saved_weights = torch.load(checkpoint_path, map_location=torch.device("cuda" if use_cuda else "cpu")) self.encoder.load_state_dict(saved_weights) self.success_load = True @@ -168,7 +178,9 @@ def load_params(self): self.mean_params.requires_grad = False # define global rotation - self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32).cuda() + self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32) + if self.use_cuda: + self.global_orient = self.global_orient.cuda() # self.global_orient[:, 0] = np.pi self.global_orient.requires_grad = False @@ -190,7 +202,9 @@ def set_input_imgonly(self, input): def get_smplx_output(self, pose_params, shape_params=None): hand_rotation = pose_params[:, :3] hand_pose = pose_params[:, 3:] - body_pose = torch.zeros((self.batch_size, 63)).float().cuda() + body_pose = torch.zeros((self.batch_size, 63)).float() + if self.use_cuda: + body_pose = body_pose.cuda() body_pose[:, 60:] = hand_rotation # set right hand rotation output = self.smplx( @@ -205,7 +219,7 @@ def get_smplx_output(self, pose_params, shape_params=None): hand_type = 'right', hand_info = self.hand_info, top_finger_joints_type = self.top_finger_joints_type, - use_cuda=True) + use_cuda=self.use_cuda) pred_verts = hand_output['vertices_shift'] pred_joints_3d = hand_output['hand_joints_shift'] diff --git a/handmocap/hand_modules/h3dw_networks.py b/handmocap/hand_modules/h3dw_networks.py index 65f912a..25412f8 100644 --- a/handmocap/hand_modules/h3dw_networks.py +++ b/handmocap/hand_modules/h3dw_networks.py @@ -45,10 +45,12 @@ def get_model(arch): class H3DWEncoder(nn.Module): - def __init__(self, opt, mean_params): + def __init__(self, opt, mean_params, use_cuda=True): super(H3DWEncoder, self).__init__() self.two_branch = opt.two_branch - self.mean_params = mean_params.clone().cuda() + self.mean_params = mean_params.clone() + if use_cuda: + self.mean_params = self.mean_params.cuda() self.opt = opt relu = nn.ReLU(inplace=False) diff --git a/integration/copy_and_paste.py b/integration/copy_and_paste.py index fd2e6e9..f3e5695 100644 --- a/integration/copy_and_paste.py +++ b/integration/copy_and_paste.py @@ -91,7 +91,7 @@ def transfer_rotation( return return_value -def integration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_shape): +def integration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_shape, use_cuda=True): integral_output_list = list() for i in range(len(pred_body_list)): body_info = pred_body_list[i] @@ -100,36 +100,52 @@ def integration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_sh integral_output_list.append(None) continue - # copy and paste - pred_betas = torch.from_numpy(body_info['pred_betas']).cuda() - pred_rotmat = torch.from_numpy(body_info['pred_rotmat']).cuda() + # copy and paste + pred_betas = torch.from_numpy(body_info['pred_betas']) + pred_rotmat = torch.from_numpy(body_info['pred_rotmat']) + if use_cuda: + pred_betas = pred_betas.cuda() + pred_rotmat = pred_rotmat.cuda() + # integrate right hand pose hand_output = dict() if hand_info is not None and hand_info['right_hand'] is not None: - right_hand_pose = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:, 3:]).cuda() - right_hand_global_orient = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:,: 3]).cuda() + right_hand_pose = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:, 3:]) + right_hand_global_orient = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:,: 3]) + if use_cuda: + right_hand_pose = right_hand_pose.cuda() + right_hand_global_orient = right_hand_global_orient.cuda() right_hand_local_orient = transfer_rotation( smplx_model, pred_rotmat, right_hand_global_orient, 21) pred_rotmat[0, 21] = right_hand_local_orient else: - right_hand_pose = torch.from_numpy(np.zeros( (1,45) , dtype= np.float32)).cuda() + right_hand_pose = torch.from_numpy(np.zeros( (1,45) , dtype= np.float32)) + if use_cuda: + right_hand_pose.cuda() right_hand_global_orient = None right_hand_local_orient = None # integrate left hand pose if hand_info is not None and hand_info['left_hand'] is not None: - left_hand_pose = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, 3:]).cuda() - left_hand_global_orient = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, :3]).cuda() + left_hand_pose = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, 3:]) + left_hand_global_orient = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, :3]) + if use_cuda: + left_hand_pose = left_hand_pose.cuda() + left_hand_global_orient = left_hand_global_orient.cuda() left_hand_local_orient = transfer_rotation( smplx_model, pred_rotmat, left_hand_global_orient, 20) pred_rotmat[0, 20] = left_hand_local_orient else: - left_hand_pose = torch.from_numpy(np.zeros((1,45), dtype= np.float32)).cuda() + left_hand_pose = torch.from_numpy(np.zeros((1,45), dtype= np.float32)) + if use_cuda: + left_hand_pose = left_hand_pose.cuda() left_hand_global_orient = None left_hand_local_orient = None - pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda() + pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat) + if use_cuda: + pred_aa = pred_aa.cuda() pred_aa = pred_aa.reshape(pred_aa.shape[0], 72) smplx_output = smplx_model( betas = pred_betas, @@ -193,16 +209,22 @@ def integration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_sh # keep hand info r_hand_local_orient_body = body_info['pred_rotmat'][:, 21] # rot-mat + r_hand_local_orient_body_tensor = torch.from_numpy(r_hand_local_orient_body) + if use_cuda: + r_hand_local_orient_body_tensor = r_hand_local_orient_body_tensor.cuda() r_hand_global_orient_body = transfer_rotation( smplx_model, pred_rotmat, - torch.from_numpy(r_hand_local_orient_body).cuda(), + r_hand_local_orient_body_tensor, 21, 'l2g', 'aa').numpy().reshape(1, 3) # aa r_hand_local_orient_body = gu.rotation_matrix_to_angle_axis(r_hand_local_orient_body) # rot-mat -> aa l_hand_local_orient_body = body_info['pred_rotmat'][:, 20] + l_hand_local_orient_body_tensor = torch.from_numpy(l_hand_local_orient_body) + if use_cuda: + l_hand_local_orient_body_tensor = l_hand_local_orient_body_tensor.cuda() l_hand_global_orient_body = transfer_rotation( smplx_model, pred_rotmat, - torch.from_numpy(l_hand_local_orient_body).cuda(), + l_hand_local_orient_body_tensor, 20, 'l2g', 'aa').numpy().reshape(1, 3) l_hand_local_orient_body = gu.rotation_matrix_to_angle_axis(l_hand_local_orient_body) # rot-mat -> aa