Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

CPU support with minimal effort #105

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions bodymocap/body_bbox_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class BodyPoseEstimator(object):
Hand Detector for third-view input.
It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
"""
def __init__(self):
def __init__(self, use_cuda=True):
self.use_cuda = use_cuda
print("Loading Body Pose Estimator")
self.__load_body_estimator()

Expand All @@ -38,13 +39,14 @@ def __load_body_estimator(self):
checkpoint = torch.load(pose2d_checkpoint, map_location='cpu')
load_state(net, checkpoint)
net = net.eval()
net = net.cuda()
if self.use_cuda:
net = net.cuda()
self.model = net


#Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py
def __infer_fast(self, img, input_height_size, stride, upsample_ratio,
cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
use_cuda=True, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
height, width, _ = img.shape
scale = input_height_size / height

Expand All @@ -54,7 +56,7 @@ def __infer_fast(self, img, input_height_size, stride, upsample_ratio,
padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)

tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
if not cpu:
if use_cuda:
tensor_img = tensor_img.cuda()

stages_output = self.model(tensor_img)
Expand All @@ -79,8 +81,10 @@ def detect_body_pose(self, img):
orig_img = img.copy()

# forward
heatmaps, pafs, scale, pad = self.__infer_fast(img,
input_height_size=256, stride=stride, upsample_ratio=upsample_ratio)
heatmaps, pafs, scale, pad = self.__infer_fast(
img, input_height_size=256, stride=stride, upsample_ratio=upsample_ratio,
use_cuda=self.use_cuda
)

total_keypoints_num = 0
all_keypoints_by_type = []
Expand Down
6 changes: 3 additions & 3 deletions bodymocap/body_mocap_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class BodyMocap(object):
def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'), use_smplx=False):

self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
self.device = device

# Load parametric model (SMPLX or SMPL)
if use_smplx:
Expand All @@ -36,7 +36,7 @@ def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'),
#Load pre-trained neural network
SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz'
self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device)
checkpoint = torch.load(regressor_checkpoint)
checkpoint = torch.load(regressor_checkpoint, map_location=device)
self.model_regressor.load_state_dict(checkpoint['model'], strict=False)
self.model_regressor.eval()

Expand Down Expand Up @@ -74,7 +74,7 @@ def regress(self, img_original, body_bbox_list):

#Convert rot_mat to aa since hands are always in aa
# pred_aa = rotmat3x3_to_angle_axis(pred_rotmat)
pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda()
pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).to(self.device)
pred_aa = pred_aa.reshape(pred_aa.shape[0], 72)
smpl_output = self.smpl(
betas=pred_betas,
Expand Down
4 changes: 2 additions & 2 deletions demo/demo_bodymocap.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ def main():
args = DemoOptions().parse()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
assert torch.cuda.is_available(), "Current version only supports GPU"
use_cuda = device.type == 'cuda'

# Set bbox detector
body_bbox_detector = BodyPoseEstimator()
body_bbox_detector = BodyPoseEstimator(use_cuda=use_cuda)

# Set mocap regressor
use_smplx = args.use_smplx
Expand Down
17 changes: 11 additions & 6 deletions demo/demo_frankmocap.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person):
def run_regress(
args, img_original_bgr,
body_bbox_list, hand_bbox_list, bbox_detector,
body_mocap, hand_mocap
body_mocap, hand_mocap,
use_cuda=True
):
cond1 = len(body_bbox_list) > 0 and len(hand_bbox_list) > 0
cond2 = not args.frankmocap_fast_mode
Expand Down Expand Up @@ -102,12 +103,14 @@ def run_regress(

# integration by copy-and-paste
integral_output_list = integration_copy_paste(
pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape)
pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape,
use_cuda=use_cuda
)

return body_bbox_list, hand_bbox_list, integral_output_list


def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer, use_cuda=True):
#Setup input data to handle different types of inputs
input_type, input_data = demo_utils.setup_input(args)

Expand Down Expand Up @@ -176,7 +179,9 @@ def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
args, img_original_bgr,
body_bbox_list, hand_bbox_list, bbox_detector,
body_mocap, hand_mocap)
body_mocap, hand_mocap,
use_cuda=use_cuda
)

# save the obtained body & hand bbox to json file
if args.save_bbox_output:
Expand Down Expand Up @@ -225,7 +230,7 @@ def main():
args.use_smplx = True

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
assert torch.cuda.is_available(), "Current version only supports GPU"
use_cuda = device.type == 'cuda'

hand_bbox_detector = HandBboxDetector('third_view', device)

Expand All @@ -240,7 +245,7 @@ def main():
from renderer.visualizer import Visualizer
visualizer = Visualizer(args.renderer_type)

run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer)
run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer, use_cuda=use_cuda)


if __name__ == '__main__':
Expand Down
1 change: 0 additions & 1 deletion demo/demo_handmocap.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def main():
args.use_smplx = True

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
assert torch.cuda.is_available(), "Current version only supports GPU"

#Set Bbox detector
bbox_detector = HandBboxDetector(args.view_type, device)
Expand Down
5 changes: 4 additions & 1 deletion docs/INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
## Installing All Modules

- The entire modules can be installed following the instruction below.
Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details.
Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details.

Also note that the calculations can be performed on CPU thus CUDA installation is not obligatory.
Nevertheless, some renderers can demand on GPU so if you have no luck with one renderer, please, try another.

- The basic installation
```
Expand Down
42 changes: 25 additions & 17 deletions handmocap/hand_bbox_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,17 @@ class Third_View_Detector(BodyPoseEstimator):
It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
with a type-agnostic hand detector (https://github.com/ddshan/hand_detector.d2)
"""
def __init__(self):
super(Third_View_Detector, self).__init__()
def __init__(self, use_cuda=True):
super(Third_View_Detector, self).__init__(use_cuda=use_cuda)
print("Loading Third View Hand Detector")
self.__load_hand_detector()


def __load_hand_detector(self):
# load cfg and model
cfg = get_cfg()
if not self.use_cuda:
cfg.MODEL.DEVICE = 'cpu'
cfg.merge_from_file("detectors/hand_only_detector/faster_rcnn_X_101_32x8d_FPN_3x_100DOH.yaml")
cfg.MODEL.WEIGHTS = 'extra_data/hand_module/hand_detector/model_0529999.pth' # add model weight here
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 # 0.3 , use low thresh to increase recall
Expand Down Expand Up @@ -139,8 +141,8 @@ class Ego_Centric_Detector(BodyPoseEstimator):
It uses type-aware hand detector:
(https://github.com/ddshan/hand_object_detector)
"""
def __init__(self):
super(Ego_Centric_Detector, self).__init__()
def __init__(self, use_cuda=True):
super(Ego_Centric_Detector, self).__init__(use_cuda=use_cuda)
print("Loading Ego Centric Hand Detector")
self.__load_hand_detector()

Expand All @@ -158,8 +160,9 @@ def __load_hand_detector(self):
fasterRCNN.load_state_dict(checkpoint['model'])
if 'pooling_mode' in checkpoint.keys():
cfgg.POOLING_MODE = checkpoint['pooling_mode']

fasterRCNN.cuda()

if self.use_cuda:
fasterRCNN.cuda()
fasterRCNN.eval()
self.hand_detector = fasterRCNN

Expand Down Expand Up @@ -196,11 +199,12 @@ def __get_image_blob(self, im):
# part of the code comes from https://github.com/ddshan/hand_object_detector/demo.py
def __get_raw_hand_bbox(self, img):
with torch.no_grad():
im_data = torch.FloatTensor(1).cuda()
im_info = torch.FloatTensor(1).cuda()
num_boxes = torch.LongTensor(1).cuda()
gt_boxes = torch.FloatTensor(1).cuda()
box_info = torch.FloatTensor(1).cuda()
if self.use_cuda:
im_data = torch.FloatTensor(1).cuda()
im_info = torch.FloatTensor(1).cuda()
num_boxes = torch.LongTensor(1).cuda()
gt_boxes = torch.FloatTensor(1).cuda()
box_info = torch.FloatTensor(1).cuda()

im_blob, im_scales = self.__get_image_blob(img)

Expand Down Expand Up @@ -231,10 +235,13 @@ def __get_raw_hand_bbox(self, img):
lr = lr.squeeze(0).float()

box_deltas = bbox_pred.data
stds = [0.1, 0.1, 0.2, 0.2]
means = [0.0, 0.0, 0.0, 0.0]
box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(stds).cuda() \
+ torch.FloatTensor(means).cuda()
stds = torch.FloatTensor([0.1, 0.1, 0.2, 0.2])
means = torch.FloatTensor([0.0, 0.0, 0.0, 0.0])
if self.use_cuda:
stds = stds.cuda()
means = stds.cuda()
box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(stds) \
+ torch.FloatTensor(means)
box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
Expand Down Expand Up @@ -303,12 +310,13 @@ def __init__(self, view_type, device):
args:
view_type: third_view or ego_centric.
"""
use_cuda = device.type == 'cuda'
self.view_type = view_type

if view_type == "ego_centric":
self.model = Ego_Centric_Detector()
self.model = Ego_Centric_Detector(use_cuda=use_cuda)
elif view_type == "third_view":
self.model = Third_View_Detector()
self.model = Third_View_Detector(use_cuda=use_cuda)
else :
print("Invalid view_type")
assert False
Expand Down
3 changes: 2 additions & 1 deletion handmocap/hand_mocap_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def __init__(self, regressor_checkpoint, smpl_dir, device = torch.device('cuda')
self.opt.process_rank = -1

# self.opt.which_epoch = str(epoch)
self.model_regressor = H3DWModel(self.opt)
use_cuda = device.type == "cuda"
self.model_regressor = H3DWModel(self.opt, use_cuda=use_cuda)
# if there is no specified checkpoint, then skip
assert self.model_regressor.success_load, "Specificed checkpoints does not exists: {}".format(self.opt.checkpoint_path)
self.model_regressor.eval()
Expand Down
32 changes: 23 additions & 9 deletions handmocap/hand_modules/h3dw_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,13 @@ class H3DWModel(object):
def name(self):
return 'H3DWModel'

def __init__(self, opt):
def __init__(self, opt, use_cuda=True):
self.opt = opt
self.Tensor = torch.cuda.FloatTensor
self.use_cuda = use_cuda
if use_cuda:
self.Tensor = torch.cuda.FloatTensor
else:
self.Tensor = torch.FloatTensor

# set params
self.inputSize = opt.inputSize
Expand Down Expand Up @@ -123,11 +127,17 @@ def __init__(self, opt):
gender = 'neutral',
num_betas = 10,
use_pca = False,
ext='pkl').cuda()
ext='pkl')
if use_cuda:
self.smplx.cuda()

# set encoder and optimizer
self.encoder = H3DWEncoder(opt, self.mean_params).cuda()
self.encoder = H3DWEncoder(opt, self.mean_params, use_cuda=use_cuda)
if use_cuda:
self.encoder = self.encoder.cuda()
if opt.dist:
if not use_cuda:
raise NotImplementedError("No support for DistributedDataParallel and non-CUDA device")
self.encoder = DistributedDataParallel(
self.encoder, device_ids=[torch.cuda.current_device()])

Expand All @@ -136,11 +146,11 @@ def __init__(self, opt):
print(f"Error: {checkpoint_path} does not exists")
self.success_load = False
else:
if self.opt.dist:
if opt.dist:
self.encoder.module.load_state_dict(torch.load(
checkpoint_path, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device())))
else:
saved_weights = torch.load(checkpoint_path)
saved_weights = torch.load(checkpoint_path, map_location=torch.device("cuda" if use_cuda else "cpu"))
self.encoder.load_state_dict(saved_weights)
self.success_load = True

Expand Down Expand Up @@ -168,7 +178,9 @@ def load_params(self):
self.mean_params.requires_grad = False

# define global rotation
self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32).cuda()
self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32)
if self.use_cuda:
self.global_orient = self.global_orient.cuda()
# self.global_orient[:, 0] = np.pi
self.global_orient.requires_grad = False

Expand All @@ -190,7 +202,9 @@ def set_input_imgonly(self, input):
def get_smplx_output(self, pose_params, shape_params=None):
hand_rotation = pose_params[:, :3]
hand_pose = pose_params[:, 3:]
body_pose = torch.zeros((self.batch_size, 63)).float().cuda()
body_pose = torch.zeros((self.batch_size, 63)).float()
if self.use_cuda:
body_pose = body_pose.cuda()
body_pose[:, 60:] = hand_rotation # set right hand rotation

output = self.smplx(
Expand All @@ -205,7 +219,7 @@ def get_smplx_output(self, pose_params, shape_params=None):
hand_type = 'right',
hand_info = self.hand_info,
top_finger_joints_type = self.top_finger_joints_type,
use_cuda=True)
use_cuda=self.use_cuda)

pred_verts = hand_output['vertices_shift']
pred_joints_3d = hand_output['hand_joints_shift']
Expand Down
6 changes: 4 additions & 2 deletions handmocap/hand_modules/h3dw_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ def get_model(arch):


class H3DWEncoder(nn.Module):
def __init__(self, opt, mean_params):
def __init__(self, opt, mean_params, use_cuda=True):
super(H3DWEncoder, self).__init__()
self.two_branch = opt.two_branch
self.mean_params = mean_params.clone().cuda()
self.mean_params = mean_params.clone()
if use_cuda:
self.mean_params = self.mean_params.cuda()
self.opt = opt

relu = nn.ReLU(inplace=False)
Expand Down
Loading