-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference_time.py
97 lines (69 loc) · 2.33 KB
/
inference_time.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
from torch.utils.data import DataLoader
import numpy as np
import os
import config
from models import my_model, densenet121, comp_resnet50, comp_dense, densenet_3stage
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
compress_rate = [0]*21
model1 = comp_resnet50(config.SMPL_MEAN_PARAMS, compress_rate)
model1.eval()
model1.to(device)
compress_rate = [ [0.], [0.]*6, [0.]*12, [0.]*24, [0.]*16 ]
model2 = comp_dense(config.SMPL_MEAN_PARAMS, compress_rate)
model2.eval()
model2.to(device)
compress_rate = [ [0.35], [0.35]*6, [0.35]*12, [0.35]*24, [0.35]*16 ]
model3 = densenet_3stage(config.SMPL_MEAN_PARAMS, compress_rate)
model3.eval()
model3.to(device)
batch_size = 1
dummy_input = torch.randn(batch_size, 3, 224, 224, dtype=torch.float).to(device)
# INIT LOGGERS
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 500
images = repetitions * batch_size
timings1=np.zeros((repetitions,1))
timings2=np.zeros((repetitions,1))
timings3=np.zeros((repetitions,1))
#GPU-WARM-UP
for _ in range(100):
_ = model1(dummy_input)
_ = model2(dummy_input)
_ = model3(dummy_input)
# MEASURE PERFORMANCE
with torch.no_grad():
for rep in range(repetitions):
starter.record()
_ = model1(dummy_input)
ender.record()
# WAIT FOR GPU SYNC
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender)
timings1[rep] = curr_time
starter.record()
_ = model2(dummy_input)
ender.record()
# WAIT FOR GPU SYNC
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender)
timings2[rep] = curr_time
starter.record()
_ = model3(dummy_input)
ender.record()
# WAIT FOR GPU SYNC
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender)
timings3[rep] = curr_time
# print(curr_time)
mean_syn1 = np.sum(timings1) / images
std_syn1 = np.std(timings1)
mean_syn2 = np.sum(timings2) / images
std_syn2 = np.std(timings2)
mean_syn3 = np.sum(timings3) / images
std_syn3 = np.std(timings3)
print('batch size : ', batch_size)
print('model1, mean time per image : ', mean_syn1)
print('model2, mean time per image : ', mean_syn2)
print('model3, mean time per image : ', mean_syn3)