Skip to content

Commit

Permalink
[cherry-pick] Refine user experience for profiler (#41989)
Browse files Browse the repository at this point in the history
* fix divide zero error when cpu only (#41794)

* reduce performance influence by RecordEvent in Python (#41822)

* reduce performance influence

* add unit test

* fix

* Rebase for profiler statistic ratio (#41939)

* fix according to suggestion

* add kernel summary

* improve coverage
  • Loading branch information
rainyfly authored Apr 20, 2022
1 parent 85a4ecb commit 2ea5e02
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 45 deletions.
7 changes: 7 additions & 0 deletions python/paddle/fluid/tests/unittests/test_newprofiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import paddle
import paddle.profiler as profiler
import paddle.profiler.utils as utils
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.io import Dataset, DataLoader
Expand All @@ -40,11 +41,17 @@ def my_trace_back(prof):
with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], ) as prof:
y = x / 2.0
prof = None
self.assertEqual(utils._is_profiler_used, False)
with profiler.RecordEvent(name='test'):
y = x / 2.0

with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU],
scheduler=(1, 2)) as prof:
self.assertEqual(utils._is_profiler_used, True)
with profiler.RecordEvent(name='test'):
y = x / 2.0

prof = None
with profiler.Profiler(
targets=[profiler.ProfilerTarget.CPU],
Expand Down
102 changes: 92 additions & 10 deletions python/paddle/fluid/tests/unittests/test_profiler_statistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ def test_statistic_case1(self):
profilerstep_node = HostPythonNode('ProfileStep#1',
profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001)
dataloader_node = HostPythonNode(
'Dataloader', profiler.TracerEventType.Forward, 5, 15, 1000, 1001)
dataloader_node = HostPythonNode('Dataloader',
profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)
mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
yolonet_node = HostPythonNode(
Expand Down Expand Up @@ -155,7 +156,7 @@ def test_statistic_case1(self):
profiler.TracerEventType.ProfileStep), 400)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Forward), 100)
profiler.TracerEventType.Forward), 90)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Backward), 80)
Expand Down Expand Up @@ -185,12 +186,12 @@ def test_statistic_case1(self):
profiler.TracerEventType.Communication), 5)
self.assertEqual(len(event_summary.items), 2)
self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.model_perspective_items), 5)
self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100)
event_summary.model_perspective_items['Forward'].cpu_time, 90)
self.assertEqual(
event_summary.model_perspective_items['Forward'].general_gpu_time,
135)
Expand All @@ -217,8 +218,9 @@ def test_statistic_case2(self):
profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001)

dataloader_node = HostPythonNode(
'Dataloader', profiler.TracerEventType.Forward, 5, 15, 1000, 1001)
dataloader_node = HostPythonNode('Dataloader',
profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)

mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)
Expand Down Expand Up @@ -372,7 +374,7 @@ def test_statistic_case2(self):
profiler.TracerEventType.ProfileStep), 400)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Forward), 100)
profiler.TracerEventType.Forward), 90)
self.assertEqual(
time_range_summary.get_cpu_range_sum(
profiler.TracerEventType.Backward), 80)
Expand Down Expand Up @@ -417,12 +419,12 @@ def test_statistic_case2(self):
distributed_summary.overlap_range), 85)
self.assertEqual(len(event_summary.items), 4)
self.assertEqual(len(event_summary.userdefined_items), 1)
self.assertEqual(len(event_summary.model_perspective_items), 4)
self.assertEqual(len(event_summary.model_perspective_items), 5)
self.assertEqual(len(event_summary.memory_manipulation_items), 1)
self.assertEqual(event_summary.items['conv2d'].cpu_time, 15)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 25)
self.assertEqual(
event_summary.model_perspective_items['Forward'].cpu_time, 100)
event_summary.model_perspective_items['Forward'].cpu_time, 90)
self.assertEqual(
event_summary.model_perspective_items['Forward'].general_gpu_time,
315)
Expand All @@ -441,6 +443,86 @@ def test_statistic_case2(self):
thread_sep=False,
time_unit='ms'))

def test_statistic_case3(self):
# for coverage, test all time is 0
root_node = HostPythonNode('Root Node',
profiler.TracerEventType.UserDefined, 0,
float('inf'), 1000, 1001)
profilerstep_node = HostPythonNode('ProfileStep#1',
profiler.TracerEventType.ProfileStep,
0, 400, 1000, 1001)
dataloader_node = HostPythonNode('Dataloader',
profiler.TracerEventType.Dataloader, 5,
15, 1000, 1001)
mobilenet_node = HostPythonNode(
'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001)

backward_node = HostPythonNode('Gradient Backward',
profiler.TracerEventType.Backward, 120,
200, 1000, 1001)
optimization_node = HostPythonNode(
'Optimization', profiler.TracerEventType.Optimization, 220, 300,
1000, 1001)
userdefined_node = HostPythonNode('Communication Time',
profiler.TracerEventType.UserDefined,
60, 70, 1000, 1001)

conv2d_node = HostPythonNode(
'conv2d', profiler.TracerEventType.Operator, 25, 25, 1000, 1001)

conv2d_infer_shape = HostPythonNode(
'conv2d::infer_shape', profiler.TracerEventType.OperatorInner, 25,
25, 1000, 1001)
conv2d_compute = HostPythonNode('conv2d::compute',
profiler.TracerEventType.OperatorInner,
25, 25, 1000, 1001)
conv2d_launchkernel = HostPythonNode(
'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 25, 25,
1000, 1001)

conv2d_kernel = DevicePythonNode(
'conv2d_kernel', profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0)
another_kernel = DevicePythonNode(
'void phi::funcs::VectorizedBroadcastKernel<float, float, phi::funcs::AddFunctor<float>, phi::funcs::AddFunctor<float>>()',
profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0)
root_node.children_node.append(profilerstep_node)
profilerstep_node.children_node.extend([
dataloader_node, mobilenet_node, userdefined_node, backward_node,
optimization_node
])
mobilenet_node.children_node.append(conv2d_node)
conv2d_node.children_node.extend([conv2d_infer_shape, conv2d_compute])
conv2d_compute.runtime_node.append(conv2d_launchkernel)
conv2d_launchkernel.device_node.append(conv2d_kernel)
conv2d_launchkernel.device_node.append(another_kernel)
thread_tree = {'thread1001': root_node}
extra_info = {
'Process Cpu Utilization': '1.02',
'System Cpu Utilization': '0.68'
}
statistic_data = profiler.profiler_statistic.StatisticData(thread_tree,
extra_info)
time_range_summary = statistic_data.time_range_summary
event_summary = statistic_data.event_summary

self.assertEqual(event_summary.items['conv2d'].cpu_time, 0)
self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 0)
self.assertEqual(event_summary.userdefined_items['Communication Time']
.general_gpu_time, 0)
for sort_key in [
profiler.SortedKeys.CPUTotal, profiler.SortedKeys.CPUMax,
profiler.SortedKeys.CPUMin, profiler.SortedKeys.CPUAvg,
profiler.SortedKeys.GPUTotal, profiler.SortedKeys.GPUMax,
profiler.SortedKeys.GPUMin, profiler.SortedKeys.GPUAvg
]:
print(
profiler.profiler_statistic._build_table(
statistic_data,
sorted_by=sort_key,
op_detail=True,
thread_sep=False,
time_unit='ms'))


if __name__ == '__main__':
unittest.main()
3 changes: 3 additions & 0 deletions python/paddle/profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from .utils import RecordEvent, wrap_optimizers
from .profiler_statistic import StatisticData, _build_table, SortedKeys
from paddle.profiler import utils
from .timer import benchmark


Expand Down Expand Up @@ -475,6 +476,7 @@ def start(self):
if self.timer_only:
return
# CLOSED -> self.current_state
utils._is_profiler_used = True
if self.current_state == ProfilerState.READY:
self.profiler.prepare()
elif self.current_state == ProfilerState.RECORD:
Expand Down Expand Up @@ -527,6 +529,7 @@ def stop(self):
self.profiler_result = self.profiler.stop()
if self.on_trace_ready:
self.on_trace_ready(self)
utils._is_profiler_used = False

def step(self, num_samples: Optional[int]=None):
r"""
Expand Down
Loading

0 comments on commit 2ea5e02

Please sign in to comment.