Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the performance and scalability of pod viewer #2254

Merged
merged 7 commits into from
May 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@
<paper-card id="card" heading="[[_name]]" hidden="[[!_name]]" elevation="2">
<template is="dom-repeat" items=[[nodes]] as="node">
<div class="card-content info">
<div hidden="[[!_isChannel(node)]]">
<p>Replica Id: <span class="value">[[node.replicaId]]</span></p>
</div>
<div hidden="[[_isStep(node)]]">
<p>Data Transferred: <span class="value">[[_sizeMiB(node.dataSize)]] MiB</span></p>
<p>Latency: <span class="value">[[_format(node.durationUs)]] µs</span></p>
Expand All @@ -68,8 +65,6 @@
</div>
<div hidden="[[!_isChannel(node)]]">
<p>Send Delay: <span class="value">[[_format(node.sendDelayUs)]] µs</span></p>
<p>From: <span class="value">Chip[[_chipId(node.srcCoreId)]], Core[[_nodeId(node.srcCoreId)]]</span></p>
<p>To: <span class="value">Chip[[_chipId(node.dstCoreId)]], Core[[_nodeId(node.dstCoreId)]]</span></p>
<p>Hlo Names: </p>
<code class="code-style">
<template is="dom-repeat" items=[[node.hloNames]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Polymer({
{key: 'lowFlopsComputeUs', label: 'Low flops compute'},
{key: 'hostInfeedDurationUs', label: 'Infeed'},
{key: 'hostOutfeedDurationUs', label: 'Outfeed'},
{key: 'crsDurationUs', label: 'All reduce'},
{key: 'allReduceComputeDurationUs', label: 'AllReduce compute'},
{key: 'allReduceSyncDurationUs', label: 'AllReduce sync'},
{key: 'sendDurationUs', label: 'Send'},
{key: 'recvDurationUs', label: 'Recv'},
],
Expand Down Expand Up @@ -114,15 +115,15 @@ Polymer({
function(node: undefined|podviewer.proto.PodStatsRecord,
key: undefined|string): string|undefined {
if (!key || !node) return;
return this._format(node[key]);
return this._format(node[key] ? node[key] : 0);
},
/**
* Return a the percentage of a specific breakdown.
*/
_getStepBreakdownPct:
function(node: undefined|podviewer.proto.PodStatsRecord,
key: undefined|string): string|undefined {
if (!key || !node || !node.totalDurationUs) return;
if (!key || !node || !node.totalDurationUs || !node[key]) return;
return (node[key] / node.totalDurationUs * 100).toFixed(2) + '%';
},
});
Expand Down
19 changes: 8 additions & 11 deletions tensorboard/plugins/profile/pod_viewer/pod_viewer_common/proto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,10 @@ module podviewer.proto {
sendDurationUs: number;
/** The time spent on recv operations. */
recvDurationUs: number;
/**
* The time spent on all-reduce in micro-seconds
* (used to be cross-replica-sum).
*/
crsDurationUs: number;
/** The time spent on actual all-reduce compute in micro-seconds. */
allReduceComputeDurationUs: number;
/** The time spent on all-reduce synchronization in micro-seconds. */
allReduceSyncDurationUs: number;
/** bottleneck out of the above mentioned metrics. */
bottleneck: string;
}
Expand Down Expand Up @@ -135,10 +134,10 @@ module podviewer.proto {
export interface ChannelInfo {
/** Id of the channel. */
channelId: number;
/** Core id of the send op. */
srcCoreId: number;
/** Core id of the recv op. */
dstCoreId: number;
/** Core ids of the send ops. */
srcCoreIds: Array<number>;
/** Core ids of the recv ops. */
dstCoreIds: Array<number>;
/** Byte size of the data transferred. */
dataSize: number;
/**
Expand All @@ -158,8 +157,6 @@ module podviewer.proto {
* op, the delay is zero.
*/
sendDelayUs: number;
/** The replica_id of the program executing the send and recv ops. */
replicaId: number;
}

/** Data input to the pod viewer tool. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
<div id="topo-graph">
<topology-graph run-environment=[[_runEnvironment]]
data=[[_podStatsMap]] metrics=[[_stepBreakdownLayers]]
active-bar=[[activeBar]] selected-channel={{selectedChannel}}>
active-bar=[[activeBar]]>
</topology-graph>
</div>
<div id="channel-bars" class="bar-chart" hidden="[[!_channelDb]]">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ Polymer({
type: Array,
notify: true,
},
selectedChannel: {
type: Array,
notify: true,
observer: '_selectedChannelChanged',
},
activeBar: {
type: Object,
notify: true,
Expand Down Expand Up @@ -66,7 +61,8 @@ Polymer({
{key: 'lowFlopsComputeUs', label: 'Low flops compute'},
{key: 'hostInfeedDurationUs', label: 'Infeed'},
{key: 'hostOutfeedDurationUs', label: 'Outfeed'},
{key: 'crsDurationUs', label: 'All reduce'},
{key: 'allReduceComputeDurationUs', label: 'AllReduce compute'},
{key: 'allReduceSyncDurationUs', label: 'AllReduce sync'},
{key: 'sendDurationUs', label: 'Send'},
{key: 'recvDurationUs', label: 'Recv'},
],
Expand Down Expand Up @@ -129,6 +125,7 @@ Polymer({
_computeRunEnvironment(
data: podviewer.proto.PodViewerInputData|undefined|null):
podviewer.proto.RunEnvironment {
qiuminxu marked this conversation as resolved.
Show resolved Hide resolved
if (!data) return;
return data.runEnvironment;
},
_computeMaxStepId(podStatsMaps: Array<podviewer.proto.PodStatsMap>): number {
Expand Down Expand Up @@ -162,6 +159,10 @@ Polymer({
if (j == 1) {
continue;
}
// Input missing a field, set it to 0.
if (!val[layers[j].key]) {
val[layers[j].key] = 0;
}
// Skip the lowFlopsComputeUs.
val['lowFlopsComputeUs'] -= val[layers[j].key];
}
Expand Down Expand Up @@ -204,14 +205,6 @@ Polymer({
if (!newData) return;
this.curStepId = 0;
},
/**
* Updates the input of the details card when selected channel changed.
*/
_selectedChannelChanged(newChannel: Array<podviewer.proto.ChannelInfo>) {
if (newChannel) {
this.activeDetails = newChannel;
}
},
/**
* The active bar could be one of the PodStatsRecord, ChannelInfo or
* AllReduceOpInfo. Reuse the details_card component to show any of these
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const LEGEND_TEXT_HEIGHT = 9.5;
const LEGEND_TEXT_SIZE = '0.32em';

const FONT_SIZE = 14;
const TRANSITION_DURATION = 1000;

Polymer({
is: 'stack-bar-chart',
Expand Down Expand Up @@ -57,9 +58,6 @@ Polymer({
if (!data.length || !this.isAttached || this.stackLayers.length == 0) {
return;
}
d3.select(this.$.chart).selectAll('g > *').remove();
d3.select(this.$.chart).select('svg').remove();
d3.select(this.$.chart).select('.svg-container').remove();
const stackKey = this.stackLayers.map((d) => d.key);
const stackLabel = this.stackLayers.map((d) => d.label);
const height = SVG_HEIGHT - SVG_MARGIN.top - SVG_MARGIN.bottom;
Expand All @@ -68,13 +66,26 @@ Polymer({
let yScale = d3.scaleLinear().range([height, 0]);
let colorScale = d3.scaleOrdinal<number, string>(d3.schemeCategory10)
.domain([0, 19]);
let svg = d3.select(this.$.chart).append('svg')
.attr('width', Math.max(SVG_MIN_WIDTH,
xScaleRange + SVG_MARGIN.left + SVG_MARGIN.right))
.attr('height', SVG_HEIGHT)
.append('g')
.attr('transform',
'translate(' + SVG_MARGIN.left + ',' + SVG_MARGIN.top + ')');
let svg = d3.select(this.$.chart).select('svg');
if (svg.empty()) {
svg = d3.select(this.$.chart).append('svg')
.attr('width', Math.max(SVG_MIN_WIDTH,
xScaleRange + SVG_MARGIN.left + SVG_MARGIN.right))
.attr('height', SVG_HEIGHT)
.append('g')
.attr('transform',
'translate(' + SVG_MARGIN.left + ',' + SVG_MARGIN.top + ')');
// Draw x-axis.
svg.append('g')
.attr('class', 'x axis')
.style('font-size', FONT_SIZE)
.attr('transform', 'translate(0,' + (height + 5) + ')');
// Draw y-axis.
svg.append('g')
.attr('class', 'y axis')
.style('font-size', FONT_SIZE)
.attr('transform', 'translate(0,0)');
}
let stack = d3.stack().keys(stackKey).order(d3.stackOrderNone)
.offset(d3.stackOffsetNone);
const layers = stack(data);
Expand All @@ -83,20 +94,28 @@ Polymer({
.nice();
this.drawLayers(svg, layers, xScale, yScale, colorScale);
this.drawAxes(svg, xScale, yScale, height);
this.drawLegend(svg, stackLabel, colorScale);
let legend = d3.select(this.$.chart).select('.legend');
if (legend.empty()) {
legend = svg.append('g')
.attr('class', 'legend')
.attr('font-family', 'sans-serif')
.attr('font-size', FONT_SIZE)
.attr('text-anchor', 'start')
}
this.drawLegend(legend, stackLabel, colorScale);
},
/**
* Draw the layers for all the bars.
*/
drawLayers: function(svg: any, layers: any, xScale: any, yScale: any,
colorScale: any) {
let parent = this;
// Update layer for each metric across all cores, and rect for each core.
let layer = svg.selectAll('.layer').data(layers);
layer.enter().append('g').merge(layer)
.attr('class', 'layer')
let rects = layer.enter().append('g').attr('class', 'layer').merge(layer)
.style('fill', (d, i) => colorScale(i))
.selectAll('rect').data((d) => d)
.enter().append('rect')
.selectAll('rect').data((d) => d);
rects.enter().append('rect').merge(rects)
.attr('width', xScale.bandwidth())
.attr('y', (d) => yScale(d[1]))
.attr('height', (d) => yScale(d[0]) - yScale(d[1]))
Expand All @@ -110,53 +129,50 @@ Polymer({
function(d) {
d3.select(this).style('opacity', 1.0);
parent.activeBar = null;
});
})
.transition()
.duration(TRANSITION_DURATION);
layer.exit().remove();
},
/**
* Draw the axes of the chart.
*/
drawAxes: function(svg: any, xScale: any, yScale: any, height: number) {
svg.append('g')
.attr('class', 'axis axis--x')
.style('font-size', FONT_SIZE)
.attr('transform', 'translate(0,' + (height + 5) + ')')
svg.select('.x.axis')
.transition()
.duration(TRANSITION_DURATION)
.call(d3.axisBottom(xScale));
svg.append('g')
.attr('class', 'axis axis--y')
.style('font-size', FONT_SIZE)
.attr('transform', 'translate(0,0)')
svg.select('.y.axis')
.transition()
.duration(TRANSITION_DURATION)
.call(d3.axisLeft(yScale));
},
/**
* Draw the legends of the chart.
*/
drawLegend: function(svg: any, labels: Array<string>, colorScale: any) {
let legend = svg.append('g')
.attr('font-family', 'sans-serif')
.attr('font-size', FONT_SIZE)
.attr('text-anchor', 'start')
.selectAll('g')
.data(labels.slice());
let legendG = legend.enter().append('g').merge(legend)
.attr('transform',
(d, i) => 'translate(' +
(i * LEGEND_WIDTH -
Math.floor(i / LABELS_PER_LANE) * LEGEND_WIDTH *
LABELS_PER_LANE) + ',' +
Math.floor(i / LABELS_PER_LANE) *
LEGEND_HEIGHT + ')'
);
drawLegend: function(selection: any, labels: Array<string>, colorScale: any) {
let legend = selection.selectAll('g').data(labels.slice());
legend.exit().remove();

legendG.append('rect')
let legendEnter = legend.enter().append('g');
qiuminxu marked this conversation as resolved.
Show resolved Hide resolved
legendEnter.append('rect')
.attr('x', YAXIS_TO_LEGEND)
.attr('width', ICON_SIZE)
.attr('height', ICON_SIZE)
.attr('fill', (d, i) => colorScale(i));
legendG.append('text')
.attr('height', ICON_SIZE);
legendEnter.append('text')
.attr('x', YAXIS_TO_LEGEND + LEGEND_MARGIN + ICON_SIZE)
.attr('y', LEGEND_TEXT_HEIGHT)
.attr('dy', LEGEND_TEXT_SIZE)
.text((d) => d);

legend = legendEnter.merge(legend);
legend.attr('transform', (d, i) => {
const x = i * LEGEND_WIDTH - Math.floor(i / LABELS_PER_LANE) *
LEGEND_WIDTH * LABELS_PER_LANE;
const y = Math.floor(i / LABELS_PER_LANE) * LEGEND_HEIGHT;
return `translate(${x}, ${y})`;
});
legend.select('rect').attr('fill', (d, i) => colorScale(i));
legend.select('text').text((d) => d);
},
/**
* Redraw the stack bar chart.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,5 @@ tf_web_library(
"@org_polymer_paper_listbox",
"@org_polymer_paper_menu",
"@org_polymer_paper_menu_button",
"@org_polymer_paper_slider",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
<link rel="import" href="../paper-icon-button/paper-icon-button.html">
<link rel="import" href="../paper-item/paper-item.html">
<link rel="import" href="../paper-listbox/paper-listbox.html">
<link rel="import" href="../paper-slider/paper-slider.html">
<link rel="import" href="pod-viewer-common.html">

<dom-module id='topology-graph'>
Expand Down Expand Up @@ -170,13 +169,6 @@
</template>
</paper-listbox>
</paper-menu-button>
<div hidden="[[!_maxChannelId]]">
<span class="control-row-left metrics-label">Please select a channel id
<paper-slider min=[[_minChannelId]] max=[[_maxChannelId]]
snaps step="1" value="{{selectedChannelId}}" editable>
</paper-slider>
</span>
</div>
</div>
<div id="tpgraph"></div>
<div id="tooltip" class="hidden">
Expand Down
Loading