-
Notifications
You must be signed in to change notification settings - Fork 29k
SPARK-2450 Adds executor log links to Web UI #3486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -362,6 +362,7 @@ private[spark] class Worker( | |
| self, | ||
| workerId, | ||
| host, | ||
| webUiPort, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that we can fix this for ephemeral ports by using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, this fixes it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Argh, I shouldn't have let this get committed without making sure this comment had been addressed. This is still broken; I'm fixing it as part of my own followup PR, which adds more tests.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh no. I thought I addressed all the comments including this one. Do you want me to fix it?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm working on a PR now that will fix this and an issue where this doesn't obey SPARK_PUBLIC_DNS (the latter is a bit trickier to test, unfortunately, since it needs some tricky SparkConf + Mockito usage to mock environment variables). |
||
| sparkHome, | ||
| executorDir, | ||
| akkaUrl, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,8 @@ import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} | |
| import org.apache.spark.util.Utils | ||
|
|
||
| /** Summary information about an executor to display in the UI. */ | ||
| private case class ExecutorSummaryInfo( | ||
| // Needs to be private[ui] because of a false positive MiMa failure. | ||
| private[ui] case class ExecutorSummaryInfo( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this visibility change is necessary
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It turns out that this is somehow necessary for the MiMa checks to pass. We should add a comment to explain this. |
||
| id: String, | ||
| hostPort: String, | ||
| rddBlocks: Int, | ||
|
|
@@ -40,7 +41,8 @@ private case class ExecutorSummaryInfo( | |
| totalInputBytes: Long, | ||
| totalShuffleRead: Long, | ||
| totalShuffleWrite: Long, | ||
| maxMemory: Long) | ||
| maxMemory: Long, | ||
| executorLogs: Map[String, String]) | ||
|
|
||
| private[ui] class ExecutorsPage( | ||
| parent: ExecutorsTab, | ||
|
|
@@ -55,6 +57,7 @@ private[ui] class ExecutorsPage( | |
| val diskUsed = storageStatusList.map(_.diskUsed).sum | ||
| val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId) | ||
| val execInfoSorted = execInfo.sortBy(_.id) | ||
| val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty | ||
|
|
||
| val execTable = | ||
| <table class={UIUtils.TABLE_CLASS_STRIPED}> | ||
|
|
@@ -79,10 +82,11 @@ private[ui] class ExecutorsPage( | |
| Shuffle Write | ||
| </span> | ||
| </th> | ||
| {if (logsExist) <th class="sorttable_nosort">Logs</th> else Seq.empty} | ||
| {if (threadDumpEnabled) <th class="sorttable_nosort">Thread Dump</th> else Seq.empty} | ||
| </thead> | ||
| <tbody> | ||
| {execInfoSorted.map(execRow)} | ||
| {execInfoSorted.map(execRow(_, logsExist))} | ||
| </tbody> | ||
| </table> | ||
|
|
||
|
|
@@ -107,7 +111,7 @@ private[ui] class ExecutorsPage( | |
| } | ||
|
|
||
| /** Render an HTML row representing an executor */ | ||
| private def execRow(info: ExecutorSummaryInfo): Seq[Node] = { | ||
| private def execRow(info: ExecutorSummaryInfo, logsExist: Boolean): Seq[Node] = { | ||
| val maximumMemory = info.maxMemory | ||
| val memoryUsed = info.memoryUsed | ||
| val diskUsed = info.diskUsed | ||
|
|
@@ -138,6 +142,21 @@ private[ui] class ExecutorsPage( | |
| <td sorttable_customkey={info.totalShuffleWrite.toString}> | ||
| {Utils.bytesToString(info.totalShuffleWrite)} | ||
| </td> | ||
| { | ||
| if (logsExist) { | ||
| <td> | ||
| { | ||
| info.executorLogs.map { case (logName, logUrl) => | ||
| <div> | ||
| <a href={logUrl}> | ||
| {logName} | ||
| </a> | ||
| </div> | ||
| } | ||
| } | ||
| </td> | ||
| } | ||
| } | ||
| { | ||
| if (threadDumpEnabled) { | ||
| val encodedId = URLEncoder.encode(info.id, "UTF-8") | ||
|
|
@@ -168,6 +187,7 @@ private[ui] class ExecutorsPage( | |
| val totalInputBytes = listener.executorToInputBytes.getOrElse(execId, 0L) | ||
| val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0L) | ||
| val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L) | ||
| val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty) | ||
|
|
||
| new ExecutorSummaryInfo( | ||
| execId, | ||
|
|
@@ -183,7 +203,8 @@ private[ui] class ExecutorsPage( | |
| totalInputBytes, | ||
| totalShuffleRead, | ||
| totalShuffleWrite, | ||
| maxMem | ||
| maxMem, | ||
| executorLogs | ||
| ) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -383,7 +383,8 @@ private[spark] object JsonProtocol { | |
|
|
||
| def executorInfoToJson(executorInfo: ExecutorInfo): JValue = { | ||
| ("Host" -> executorInfo.executorHost) ~ | ||
| ("Total Cores" -> executorInfo.totalCores) | ||
| ("Total Cores" -> executorInfo.totalCores) ~ | ||
| ("Log Urls" -> mapToJson(executorInfo.logUrlMap)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree that we should have backwards-compatibility tests for this.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I was mistaken: we don't need backwards-compatibility tests because this ExecutorInfo class is new in 1.3. We should revert my commit to add the compatibility tests, since it's just adds clutter now.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this is correct. The previous commit that was blocking this was my patch to add the ExecutorInfo. I totally blanked too so my mistake. |
||
| } | ||
|
|
||
| /** ------------------------------ * | ||
|
|
@@ -792,7 +793,8 @@ private[spark] object JsonProtocol { | |
| def executorInfoFromJson(json: JValue): ExecutorInfo = { | ||
| val executorHost = (json \ "Host").extract[String] | ||
| val totalCores = (json \ "Total Cores").extract[Int] | ||
| new ExecutorInfo(executorHost, totalCores) | ||
| val logUrls = mapFromJson(json \ "Log Urls").toMap | ||
| new ExecutorInfo(executorHost, totalCores, logUrls) | ||
| } | ||
|
|
||
| /** -------------------------------- * | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.deploy | ||
|
|
||
| import scala.collection.mutable | ||
|
|
||
| import org.scalatest.{BeforeAndAfter, FunSuite} | ||
|
|
||
| import org.apache.spark.scheduler.cluster.ExecutorInfo | ||
| import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener} | ||
| import org.apache.spark.{SparkContext, LocalSparkContext} | ||
|
|
||
| class LogUrlsStandaloneSuite extends FunSuite with LocalSparkContext with BeforeAndAfter { | ||
|
|
||
| /** Length of time to wait while draining listener events. */ | ||
| val WAIT_TIMEOUT_MILLIS = 10000 | ||
|
|
||
| before { | ||
| sc = new SparkContext("local-cluster[2,1,512]", "test") | ||
| } | ||
|
|
||
| test("verify log urls get propagated from workers") { | ||
| val listener = new SaveExecutorInfo | ||
| sc.addSparkListener(listener) | ||
|
|
||
| val rdd1 = sc.parallelize(1 to 100, 4) | ||
| val rdd2 = rdd1.map(_.toString) | ||
| rdd2.setName("Target RDD") | ||
| rdd2.count() | ||
|
|
||
| assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)) | ||
| listener.addedExecutorInfos.values.foreach { info => | ||
| assert(info.logUrlMap.nonEmpty) | ||
| } | ||
| } | ||
|
|
||
| private class SaveExecutorInfo extends SparkListener { | ||
| val addedExecutorInfos = mutable.Map[String, ExecutorInfo]() | ||
|
|
||
| override def onExecutorAdded(executor: SparkListenerExecutorAdded) { | ||
| addedExecutorInfos(executor.executorId) = executor.executorInfo | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like
$hosthere might be a machine's hostname, but we probably want this to reflect SPARK_PUBLIC_DNS (or whatever the new system property equivalent is) so that we generate an externally-accessible URL.