Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 138 additions & 25 deletions core/src/main/resources/org/apache/spark/ui/static/stagepage.js
Original file line number Diff line number Diff line change
Expand Up @@ -243,23 +243,39 @@ function createRowMetadataForColumn(colKey, data, checkboxId) {
}

function reselectCheckboxesBasedOnTaskTableState() {
var allChecked = true;
var taskSummaryHasSelected = false;
var executorSummaryHasSelected = false;
var allTaskSummaryChecked = true;
var allExecutorSummaryChecked = true;
var taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
if (typeof taskTableSelector !== 'undefined' && taskSummaryMetricsTableCurrentStateArray.length > 0) {
for (var k = 0; k < optionalColumns.length; k++) {
if (taskTableSelector.column(optionalColumns[k]).visible()) {
taskSummaryHasSelected = true;
$("#box-"+optionalColumns[k]).prop('checked', true);
taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == optionalColumns[k])[0]);
taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
} else {
allChecked = false;
allTaskSummaryChecked = false;
}
}
if (allChecked) {
$("#box-0").prop('checked', true);
}
createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableCurrentFilteredArray);
}

if (typeof executorSummaryTableSelector !== 'undefined') {
for (var k = 0; k < executorOptionalColumns.length; k++) {
if (executorSummaryTableSelector.column(executorOptionalColumns[k]).visible()) {
executorSummaryHasSelected = true;
$("#executor-box-"+executorOptionalColumns[k]).prop('checked', true);
} else {
allExecutorSummaryChecked = false;
}
}
}

if ((taskSummaryHasSelected || executorSummaryHasSelected) && allTaskSummaryChecked && allExecutorSummaryChecked) {
$("#box-0").prop('checked', true);
}
}

function getStageAttemptId() {
Expand All @@ -278,6 +294,9 @@ var taskSummaryMetricsDataTable;
var optionalColumns = [11, 12, 13, 14, 15, 16, 17, 21];
var taskTableSelector;

var executorOptionalColumns = [15, 16, 17, 18];
var executorSummaryTableSelector;

$(document).ready(function () {
setDataTableDefaults();

Expand All @@ -288,14 +307,18 @@ $(document).ready(function () {
"</a></div>" +
"<div class='container-fluid-div ml-4 d-none' id='toggle-metrics'>" +
"<div id='select_all' class='select-all-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
"<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
"<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" +
"<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" +
"<div id='shuffle_remote_reads' class='shuffle-remote-reads-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" +
"<div id='shuffle_write_time' class='shuffle-write-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-21' data-column='21'> Shuffle Write Time</div>" +
"<div id='result_serialization_time' class='result-serialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15'> Result Serialization Time</div>" +
"<div id='getting_result_time' class='getting-result-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" +
"<div id='peak_execution_memory' class='peak-execution-memory-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" +
"<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11' data-metrics-type='task'> Scheduler Delay</div>" +
"<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12' data-metrics-type='task'> Task Deserialization Time</div>" +
"<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13' data-metrics-type='task'> Shuffle Read Blocked Time</div>" +
"<div id='shuffle_remote_reads' class='shuffle-remote-reads-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14' data-metrics-type='task'> Shuffle Remote Reads</div>" +
"<div id='shuffle_write_time' class='shuffle-write-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-21' data-column='21' data-metrics-type='task'> Shuffle Write Time</div>" +
"<div id='result_serialization_time' class='result-serialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15' data-metrics-type='task'> Result Serialization Time</div>" +
"<div id='getting_result_time' class='getting-result-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16' data-metrics-type='task'> Getting Result Time</div>" +
"<div id='peak_execution_memory' class='peak-execution-memory-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17' data-metrics-type='task'> Peak Execution Memory</div>" +
"<div id='executor_jvm_on_off_heap_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-15' data-column='15' data-metrics-type='executor'> Peak JVM Memory OnHeap / OffHeap</div>" +
"<div id='executor_on_off_heap_execution_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-16' data-column='16' data-metrics-type='executor'> Peak Execution Memory OnHeap / OffHeap</div>" +
"<div id='executor_on_off_heap_storage_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-17' data-column='17' data-metrics-type='executor'> Peak Storage Memory OnHeap / OffHeap</div>" +
"<div id='executor_direct_mapped_pool_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-18' data-column='18' data-metrics-type='executor'> Peak Pool Memory Direct / Mapped</div>" +
"</div>");

$('#scheduler_delay').attr("data-toggle", "tooltip")
Expand Down Expand Up @@ -463,15 +486,95 @@ $(document).ready(function () {
data : function (row, type) {
return typeof row.diskBytesSpilled != 'undefined' ? formatBytes(row.diskBytesSpilled, type) : "";
}
},
{
data : function (row, type) {
var peakMemoryMetrics = row.peakMemoryMetrics;
if (typeof peakMemoryMetrics !== 'undefined') {
if (type !== 'display')
return peakMemoryMetrics.JVMHeapMemory;
else
return (formatBytes(peakMemoryMetrics.JVMHeapMemory, type) + ' / ' +
formatBytes(peakMemoryMetrics.JVMOffHeapMemory, type));
} else {
if (type !== 'display') {
return 0;
} else {
return '0.0 B / 0.0 B';
}
}

}
},
{
data : function (row, type) {
var peakMemoryMetrics = row.peakMemoryMetrics
if (typeof peakMemoryMetrics !== 'undefined') {
if (type !== 'display')
return peakMemoryMetrics.OnHeapExecutionMemory;
else
return (formatBytes(peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' +
formatBytes(peakMemoryMetrics.OffHeapExecutionMemory, type));
} else {
if (type !== 'display') {
return 0;
} else {
return '0.0 B / 0.0 B';
}
}
}
},
{
data : function (row, type) {
var peakMemoryMetrics = row.peakMemoryMetrics
if (typeof peakMemoryMetrics !== 'undefined') {
if (type !== 'display')
return peakMemoryMetrics.OnHeapStorageMemory;
else
return (formatBytes(peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' +
formatBytes(peakMemoryMetrics.OffHeapStorageMemory, type));
} else {
if (type !== 'display') {
return 0;
} else {
return '0.0 B / 0.0 B';
}
}
}
},
{
data : function (row, type) {
var peakMemoryMetrics = row.peakMemoryMetrics
if (typeof peakMemoryMetrics !== 'undefined') {
if (type !== 'display')
return peakMemoryMetrics.DirectPoolMemory;
else
return (formatBytes(peakMemoryMetrics.DirectPoolMemory, type) + ' / ' +
formatBytes(peakMemoryMetrics.MappedPoolMemory, type));
} else {
if (type !== 'display') {
return 0;
} else {
return '0.0 B / 0.0 B';
}
}
}
}
],
"columnDefs": [
{ "visible": false, "targets": 15 },
{ "visible": false, "targets": 16 },
{ "visible": false, "targets": 17 },
{ "visible": false, "targets": 18 }
],
"deferRender": true,
"order": [[0, "asc"]],
"bAutoWidth": false,
"oLanguage": {
"sEmptyTable": "No data to show yet"
}
};
var executorSummaryTableSelector =
executorSummaryTableSelector =
$("#summary-executor-table").DataTable(executorSummaryConf);
$('#parent-container [data-toggle="tooltip"]').tooltip();

Expand Down Expand Up @@ -923,30 +1026,40 @@ $(document).ready(function () {
var para = $(this).attr('data-column');
if (para == "0") {
var allColumns = taskTableSelector.columns(optionalColumns);
var executorAllColumns = executorSummaryTableSelector.columns(executorOptionalColumns);
if ($(this).is(":checked")) {
$(".toggle-vis").prop('checked', true);
allColumns.visible(true);
executorAllColumns.visible(true);
createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableArray);
} else {
$(".toggle-vis").prop('checked', false);
allColumns.visible(false);
executorAllColumns.visible(false);
var taskSummaryMetricsTableFilteredArray =
taskSummaryMetricsTableArray.filter(row => row.checkboxId < 11);
createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
}
} else {
var column = taskTableSelector.column(para);
// Toggle the visibility
column.visible(!column.visible());
var taskSummaryMetricsTableFilteredArray = [];
if ($(this).is(":checked")) {
taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]);
taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
} else {
taskSummaryMetricsTableFilteredArray =
taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para);
var dataMetricsType = $(this).attr("data-metrics-type");
if (dataMetricsType === 'task') {
var column = taskTableSelector.column(para);
// Toggle the visibility
column.visible(!column.visible());
var taskSummaryMetricsTableFilteredArray = [];
if ($(this).is(":checked")) {
taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]);
taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
} else {
taskSummaryMetricsTableFilteredArray =
taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para);
}
createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
}
if (dataMetricsType === "executor") {
var column = executorSummaryTableSelector.column(para);
column.visible(!column.visible());
}
createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ <h4 class="title-table">Aggregated Metrics by Executor</h4>
<th><span id="executor-summary-shuffle-write">Shuffle Write Size / Records</span></th>
<th>Spill (Memory) </th>
<th>Spill (Disk) </th>
<th>Peak JVM Memory OnHeap / OffHeap</th>
<th>Peak Execution Memory OnHeap / OffHeap</th>
<th>Peak Storage Memory OnHeap / OffHeap</th>
<th>Peak Pool Memory Direct / Mapped</th>
</tr>
</thead>
<tbody>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,9 @@ private[spark] class AppStatusListener(
stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary)
}
stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1

stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics
.compareAndUpdatePeakValues(event.taskExecutorMetrics)
Copy link
Member

@sarutak sarutak Dec 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... executor metrics for each stage should be collected here.
But if the heartbeat interval from an executor is longer than lifetime of a stage, we can't collect the executor metrics for the stage.
So this change can be one option. What do you think @gengliangwang ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We collect all data to choose the peak metrics. IMO, more accuracy is better.
But I'm not particularly clear about the performance impact of this part, hope more suggestion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@imback82 Do you have any concern about this change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine to me if we need more accurate peak values.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the first metrics of peakExecutorMetrics become 0 instead of -1 after this. @AngersZhuuuu Do you know the reason?

Copy link
Contributor Author

@AngersZhuuuu AngersZhuuuu Dec 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the first metrics of peakExecutorMetrics become 0 instead of -1 after this. @AngersZhuuuu Do you know the reason?

-1 is default peakExecutorMetrics, and with this change, we will update this value with real task metrics, but why all is 0 depend on the metrics data...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the first metrics of peakExecutorMetrics become 0 instead of -1 after this. @AngersZhuuuu Do you know the reason?

@gengliangwang
By this change, peakExecutorMetrics is updated not only onExecutorMetricsUpdate but also onTaskEnd.
So, the peak value carried by SparkListenerTaskEnd is 0, the corresponding peak values in peakExecutorMetrics is set to 0.
Do you have any concern?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarutak No, I am ok with it :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

O.K, I'll merge later if there are no objections. Thanks for the response. @gengliangwang

// [SPARK-24415] Wait for all tasks to finish before removing stage from live list
val removeStage =
stage.activeTasks == 0 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,28 @@
"memoryBytesSpilled" : 0,
"diskBytesSpilled" : 0,
"isBlacklistedForStage" : true,
"peakMemoryMetrics" : {
"JVMHeapMemory" : 0,
"JVMOffHeapMemory" : 0,
"OnHeapExecutionMemory" : 0,
"OffHeapExecutionMemory" : 0,
"OnHeapStorageMemory" : 0,
"OffHeapStorageMemory" : 0,
"OnHeapUnifiedMemory" : 0,
"OffHeapUnifiedMemory" : 0,
"DirectPoolMemory" : 0,
"MappedPoolMemory" : 0,
"ProcessTreeJVMVMemory" : 0,
"ProcessTreeJVMRSSMemory" : 0,
"ProcessTreePythonVMemory" : 0,
"ProcessTreePythonRSSMemory" : 0,
"ProcessTreeOtherVMemory" : 0,
"ProcessTreeOtherRSSMemory" : 0,
"MinorGCCount" : 0,
"MinorGCTime" : 0,
"MajorGCCount" : 0,
"MajorGCTime" : 0
},
"isExcludedForStage" : true
},
"1" : {
Expand All @@ -716,6 +738,28 @@
"memoryBytesSpilled" : 0,
"diskBytesSpilled" : 0,
"isBlacklistedForStage" : false,
"peakMemoryMetrics" : {
"JVMHeapMemory" : 0,
"JVMOffHeapMemory" : 0,
"OnHeapExecutionMemory" : 0,
"OffHeapExecutionMemory" : 0,
"OnHeapStorageMemory" : 0,
"OffHeapStorageMemory" : 0,
"OnHeapUnifiedMemory" : 0,
"OffHeapUnifiedMemory" : 0,
"DirectPoolMemory" : 0,
"MappedPoolMemory" : 0,
"ProcessTreeJVMVMemory" : 0,
"ProcessTreeJVMRSSMemory" : 0,
"ProcessTreePythonVMemory" : 0,
"ProcessTreePythonRSSMemory" : 0,
"ProcessTreeOtherVMemory" : 0,
"ProcessTreeOtherRSSMemory" : 0,
"MinorGCCount" : 0,
"MinorGCTime" : 0,
"MajorGCCount" : 0,
"MajorGCTime" : 0
},
"isExcludedForStage" : false
}
},
Expand Down
Loading