-
Notifications
You must be signed in to change notification settings - Fork 10
/
profile.py
109 lines (95 loc) · 3.29 KB
/
profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# --------------------------------------------------------
# Copyright (C) 2022 NVIDIA Corporation. All rights reserved.
#
# Official PyTorch implementation of NeurIPS2022 paper
# Structural Pruning via Latency-Saliency Knapsack
# Maying Shen, Hongxu Yin, Pavlo Molchanov, Lei Mao, Jianna Liu and Jose M. Alvarez
#
# This work is licensed under the NVIDIA Source Code License
# To view a copy of this license, see the LICENSE file.
# --------------------------------------------------------
import argparse
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import yaml
from models import get_model
from utils.model_summary import model_summary
from utils.utils import ExpConfig
parser = argparse.ArgumentParser(
description="Main script for running HALP algorithm."
)
parser.add_argument(
"--exp", type=str, default="configs/exp_configs/rn50_imagenet_baseline.yaml",
help="Config file for the experiment."
)
parser.add_argument(
"--model_path", type=str, default=None,
help="The path of the model."
)
parser.add_argument(
"--mask_path", type=str, required=True,
help="The path of the mask file."
)
parser.add_argument(
"--batch_size", type=int, default=256,
help="The batch size of inference."
)
args = parser.parse_args()
def main():
with open(args.exp) as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
exp_cfg = ExpConfig(cfg)
exp_cfg.override_config(vars(args))
cudnn.benchmark = True
cudnn.deterministic = True
torch.set_grad_enabled(False)
gpu = 0
cuda = True
model = get_model(
exp_cfg.arch, exp_cfg.class_num, exp_cfg.enable_bias, group_mask_file=args.mask_path
)
if args.model_path is not None:
resume_ckpt = torch.load(args.model_path, map_location="cpu")
if "state_dict" in resume_ckpt:
resume_ckpt_state_dict = resume_ckpt["state_dict"]
else:
resume_ckpt_state_dict = resume_ckpt
model.load_state_dict(
{k.replace("module.", ""): v for k, v in resume_ckpt_state_dict.items()}
)
device = torch.device(gpu)
model.eval()
model.to(device)
if exp_cfg.dataset_name.lower() == "imagenet":
input = torch.randn(exp_cfg.batch_size, 3, 224, 224)
elif exp_cfg.dataset_name.lower() == "cifar10":
input = torch.randn(exp_cfg.batch_size, 3, 32, 32)
else:
raise NotImplementedError
start_evt = torch.cuda.Event(enable_timing=True)
end_evt = torch.cuda.Event(enable_timing=True)
times = []
for i in range(40):
input = input.to(device)
start_evt.record()
output = model(input)
end_evt.record()
torch.cuda.synchronize()
elapsed_time = start_evt.elapsed_time(end_evt)
# warmup
if i < 10:
continue
times.append(elapsed_time)
print("Infer time (ms/image)", np.mean(times) / exp_cfg.batch_size)
print("FPS:", exp_cfg.batch_size * 1e+3 / np.mean(times))
if exp_cfg.dataset_name.lower() == "imagenet":
input = torch.randn(1, 3, 224, 224)
elif exp_cfg.dataset_name.lower() == 'cifar10':
input = torch.randn(1, 3, 32, 32)
else:
raise NotImplementedError
flops = model_summary(model, input.cuda())
print('MACs(G): {:.3f}'.format(flops / 1e9))
if __name__ == "__main__":
main()