forked from apache/mxnet
-
Notifications
You must be signed in to change notification settings - Fork 1
/
profiler_utils.py
262 lines (215 loc) · 10.8 KB
/
profiler_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import time
import functools
import numpy as np
from .common_utils import merge_map_list
from mxnet import profiler
"""
TODO: Below we are using logic of parsing the MXNet profiler output string to
fetch the benchmark results. Note that this is a temporary solution till we add
a new utility API into MXNet profiler to get_summary(), reset(). All the below
parsing logic should be removed once these read APIs are available in Profiler.
"""
def _get_memory_profile(memory_profile_results):
memory_profile = {}
for line in memory_profile_results:
if line.startswith("Memory:"):
device_id = line.split()[1]
avg_time_memory_alloc = float(line.split()[-1])
memory_profile["max_storage_mem_alloc_" + device_id] = avg_time_memory_alloc
return memory_profile
def _get_operator_profile(operator_name, operator_profile_results):
operator_profile = {}
# alias map : dictionary of the form {"alias" : "registered_name"}
# allows to retrieve alias operator profile from the profiler results
# TODO handling - "identity" : "_copy"
alias_map = {"broadcast_plus" : "broadcast_add", "broadcast_minus" : "broadcast_sub", "flatten" : "Flatten", "max_axis" : "max",
"swapaxes" : "SwapAxis", "flip" : "reverse", "reshape" : "Reshape", "crop" : "slice", "sum_axis" : "sum", "min_axis" : "min"}
op_name = None
if operator_name in alias_map:
op_name = alias_map[operator_name]
else:
op_name = operator_name
for line in operator_profile_results:
if op_name in line or op_name[:3] + " " in line:
operation = line.split()[0]
operation_avg_time = float(line.split()[-1])
if "_backward" in operation:
operator_profile["avg_time_backward_" + operator_name] = operation_avg_time
else:
operator_profile["avg_time_forward_" + operator_name] = operation_avg_time
return operator_profile
def parse_profiler_dump(operator_name, profiler_dump):
"""Parse the MXNet profiler dump output, fetch Memory profile results and
Operator compute profiler results.
Parameters
----------
profiler_dump: string
MXNet profiler output from mx.profiler.dumps() API.
Returns
-------
map, Memory and Compute profiler results.
"""
if not profiler_dump:
raise AssertionError("Invalid MXNet profiler output provided to parse!")
"""
MXNet profiler output from mx.profiler.dumps() API looks like below. This function parses
this string profiler output to fetch Memory and Compute metrics.
Profile Statistics.
Note that counter items are counter values and not time units.
Device Storage
=================
Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms)
---- ----------- --------- ------------- ------------- -------------
Memory: cpu/0 100 2097152.0000 1681915.8750 2097152.0000 207618.0469
MXNET_C_API
=================
Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms)
---- ----------- --------- ------------- ------------- -------------
MXNDArrayFree 49 1.1220 0.0170 0.0360 0.0229
MXAutogradBackwardEx 50 11.5460 0.1980 0.3360 0.2309
MXNet C API Calls 399 1.9990 1.6010 1.9990 0.1990
MXImperativeInvokeEx 50 4.4810 0.0700 0.1330 0.0896
MXNDArrayWaitAll 50 769.0570 14.0200 24.5030 15.3811
MXAutogradSetIsTraining 100 0.0190 0.0000 0.0010 0.0002
MXAutogradSetIsRecording 100 0.0400 0.0000 0.0010 0.0004
MXNet C API Concurrency 798 0.0000 0.0000 0.0010 0.0005
operator
=================
Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms)
---- ----------- --------- ------------- ------------- -------------
DeleteVariable 196 1.4490 0.0040 0.0250 0.0074
_backward_broadcast_add 100 521.2320 4.8070 8.5970 5.2123
SetValueOp 100 645.8060 5.8820 10.0380 6.4581
broadcast_add 100 394.8910 3.5230 5.8790 3.9489
"""
# String Patterns to look out for when parsing
memory_profile_result_start = "Device Storage" # Helps identify start of Memory profile
c_api_profile_result_start = "MXNET_C_API" # Helps identify end of Memory profile
operator_profile_result_start = "operator" # Helps identify start of Operator profile
memory_profile_results = []
operator_profile_results = []
# Parse lines corresponding to Memory and Computation profiling
read_memory_profile = False
read_operator_profile = False
for line in profiler_dump.splitlines():
if line.startswith(memory_profile_result_start):
read_memory_profile = True
elif line.startswith(operator_profile_result_start):
read_operator_profile = True
elif line.startswith(c_api_profile_result_start):
read_memory_profile = False
if read_memory_profile:
memory_profile_results.append(line)
elif read_operator_profile:
operator_profile_results.append(line)
# Prepare results
memory_profile = _get_memory_profile(memory_profile_results)
operator_profile = _get_operator_profile(operator_name, operator_profile_results)
return merge_map_list([memory_profile, operator_profile])
def cpp_profile(func):
"""Decorator for profiling MXNet operation.
Uses MXNet profiler to collect metrics on memory usage and execution time
of the operation.
Parameters
----------
func:
Operation to be executed and timed.
Returns
-------
res, profiler output. res being result returned after operator execution.
profiler output is a dictionary with summary of operation execution.
Example output : { "add": [{"avg_time_mem_alloc_cpu/0": 207618.0469,
"avg_time_forward_broadcast_add": 4.204,
"avg_time_backward_broadcast_add": 5.6288,
"inputs": {
"lhs": [1024, 1024],
"rhs": [1024,1024]
}]
}
"""
@functools.wraps(func)
def cpp_profile_it(*args, **kwargs):
# Profile the operation
profiler.set_config(profile_all=True, aggregate_stats=True)
profiler.set_state('run')
res = func(*args, **kwargs)
profiler.set_state('stop')
# Prepare the results
profiler_dump = profiler.dumps(reset=True)
# args[0] is assumed to be operator name, if not found check for block name.
# NOTE: This parameter should be removed when we get away from parsing
# profiler output and start using new profiler APIs - get_summary(), reset()
if len(args) > 0:
operator_name = args[0].__name__
elif 'block' in kwargs:
operator_name = kwargs['block']._op_name
else:
raise ValueError("Unable to identify operator name to extract profiler output!")
# Get the MXNet profile output
profiler_output = parse_profiler_dump(operator_name, profiler_dump)
return res, profiler_output
return cpp_profile_it
def python_profile(func):
"""Decorator for profiling MXNet operation.
Uses Python's time module to collect execution time information
of the operation.
Parameters
----------
func:
Operation to be executed and timed.
Returns
-------
res, timing output. res being result returned after operator execution.
profiler output is a dictionary with summary of operation execution.
Example output : { "add": [{"avg_time_add": 0.4053089120425284,
"inputs": {
"lhs": [1024, 1024],
"rhs": [1024,1024]
}]
}
"""
@functools.wraps(func)
def python_profile_it(*args, **kwargs):
runs = args[1]
modified_args = (args[0], 1, args[2])
times = []
for _ in range(runs):
start_time = time.perf_counter() # 1
res = func(*modified_args, **kwargs)
end_time = time.perf_counter() # 2
run_time = (end_time - start_time)*1000 # 3
times.append(run_time)
# NOTE : same as cpp_profile_it
if len(args) > 0:
operator_name = args[0].__name__
elif 'block' in kwargs:
operator_name = kwargs['block']._op_name
else:
raise ValueError("Unable to identify operator name to extract profiler output!")
avg_run_time = np.mean(times)
p50_run_time = np.percentile(times, 50)
p90_run_time = np.percentile(times, 90)
p99_run_time = np.percentile(times, 99)
profiler_output = {'avg_time_'+str(operator_name): avg_run_time,
'p50_'+str(operator_name): p50_run_time,
'p90_'+str(operator_name): p90_run_time,
'p99_'+str(operator_name): p99_run_time,
}
return res, profiler_output
return python_profile_it