diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 501e865c4105..b35768222437 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -570,7 +570,9 @@ class SparkContext(config: SparkConf) extends Logging { _applicationAttemptId = _taskScheduler.applicationAttemptId() _conf.set("spark.app.id", _applicationId) if (_conf.get(UI_REVERSE_PROXY)) { - System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId) + val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + + "/proxy/" + _applicationId + System.setProperty("spark.ui.proxyBase", proxyUrl) } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index ceeb01149f5d..a582a5d04585 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -147,7 +147,13 @@ private[deploy] class Master( webUi.bind() masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}" if (reverseProxy) { - masterWebUiUrl = conf.get(UI_REVERSE_PROXY_URL).orElse(Some(masterWebUiUrl)).get + val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/")) + if (uiReverseProxyUrl.nonEmpty) { + System.setProperty("spark.ui.proxyBase", uiReverseProxyUrl.get) + // If the master URL has a path component, it must end with a slash. + // Otherwise the browser generates incorrect relative links + masterWebUiUrl = uiReverseProxyUrl.get + "/" + } webUi.addProxy() logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " + s"Applications UIs are available at $masterWebUiUrl") diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index e4fcae13a2f8..2e26ccf671d8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -171,7 +171,8 @@ private[deploy] class ExecutorRunner( // Add webUI log urls val baseUrl = if (conf.get(UI_REVERSE_PROXY)) { - s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" + conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + + s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" } else { s"$webUiScheme$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType=" } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 0660dbdafd60..a6092f637a9c 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -276,7 +276,14 @@ private[deploy] class Worker( master = Some(masterRef) connected = true if (reverseProxy) { - logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId") + logInfo("WorkerWebUI is available at %s/proxy/%s".format( + activeMasterWebUiUrl.stripSuffix("/"), workerId)) + // if reverseProxyUrl is not set, then we continue to generate relative URLs + // starting with "/" throughout the UI and do not use activeMasterWebUiUrl + val proxyUrl = conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") + // In the method `UIUtils.makeHref`, the URL segment "/proxy/$worker_id" will be appended + // after `proxyUrl`, so no need to set the worker ID in the `spark.ui.proxyBase` here. + System.setProperty("spark.ui.proxyBase", proxyUrl) } // Cancel any outstanding re-registration attempts because we found a new master cancelLastRegistrationRetry() diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index dba6f8e8440c..5e3406037a72 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -639,7 +639,8 @@ private[spark] object UIUtils extends Logging { */ def makeHref(proxy: Boolean, id: String, origHref: String): String = { if (proxy) { - s"/proxy/$id" + val proxyPrefix = sys.props.getOrElse("spark.ui.proxyBase", "") + proxyPrefix + "/proxy/" + id } else { origHref } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 3329300b64d1..a46799df069d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -143,6 +143,10 @@ class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkCo class MasterSuite extends SparkFunSuite with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter { + // regex to extract worker links from the master webui HTML + // groups represent URL and worker ID + val WORKER_LINK_RE = """\s*(worker-.+?)\s*""".r + private var _master: Master = _ after { @@ -320,10 +324,10 @@ class MasterSuite extends SparkFunSuite val conf = new SparkConf() val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5.seconds), interval(100.milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") - .getLines().mkString("\n") + val json = Source.fromURL(s"$masterUrl/json").getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => @@ -332,6 +336,16 @@ class MasterSuite extends SparkFunSuite .getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList + workerLinks.size should be (2) + workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) => + val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + } } } finally { localCluster.stop() @@ -340,31 +354,106 @@ class MasterSuite extends SparkFunSuite test("master/worker web ui available with reverseProxy") { implicit val formats = org.json4s.DefaultFormats - val reverseProxyUrl = "http://localhost:8080" + val conf = new SparkConf() + conf.set(UI_REVERSE_PROXY, true) + val localCluster = new LocalSparkCluster(2, 2, 512, conf) + localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" + try { + eventually(timeout(5.seconds), interval(100.milliseconds)) { + val json = Source.fromURL(s"$masterUrl/json") + .getLines().mkString("\n") + val JArray(workers) = (parse(json) \ "workers") + workers.size should be (2) + workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master + val JString(workerId) = workerSummaryJson \ "id" + val url = s"$masterUrl/proxy/${workerId}/json" + val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) + (workerResponse \ "cores").extract[Int] should be (2) + } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + html should include ("""href="/static""") + html should include ("""src="/static""") + verifyWorkerUI(html, masterUrl) + } + } finally { + localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + test("master/worker web ui available behind front-end reverseProxy") { + implicit val formats = org.json4s.DefaultFormats + val reverseProxyUrl = "http://proxyhost:8080/path/to/spark" val conf = new SparkConf() conf.set(UI_REVERSE_PROXY, true) conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl) val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5.seconds), interval(100.milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master val JString(workerId) = workerSummaryJson \ "id" - val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json" + val url = s"$masterUrl/proxy/${workerId}/json" val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) - (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl) + (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/") } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith + (s"$reverseProxyUrl/proxy/worker-") + System.setProperty("spark.ui.proxyBase", reverseProxyUrl) + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + verifyStaticResourcesServedByProxy(html, reverseProxyUrl) + verifyWorkerUI(html, masterUrl, reverseProxyUrl) } } finally { localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + private def verifyWorkerUI(masterHtml: String, masterUrl: String, + reverseProxyUrl: String = ""): Unit = { + val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList + workerLinks.size should be (2) + workerLinks foreach { + case WORKER_LINK_RE(workerUrl, workerId) => + workerUrl should be (s"$reverseProxyUrl/proxy/$workerId") + // there is no real front-end proxy as defined in $reverseProxyUrl + // construct url directly targeting the master + val url = s"$masterUrl/proxy/$workerId/" + System.setProperty("spark.ui.proxyBase", workerUrl) + val workerHtml = Source.fromURL(url).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + verifyStaticResourcesServedByProxy(workerHtml, workerUrl) + case _ => fail // make sure we don't accidentially skip the tests } } + private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = { + html should not include ("""href="/static""") + html should include (s"""href="$proxyUrl/static""") + html should not include ("""src="/static""") + html should include (s"""src="$proxyUrl/static""") + } + test("basic scheduling - spread out") { basicScheduling(spreadOut = true) } diff --git a/docs/configuration.md b/docs/configuration.md index d825a589dfd3..e4486cf12ca0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1193,8 +1193,29 @@ Apart from these, the following properties are also available, and may be useful
spark.ui.reverseProxyUrlhttp://mydomain.com/path/to/spark/, allowing you to serve the
+ UI for multiple Spark clusters and other web applications through the same virtual host and
+ port.
+ Normally, this should be an absolute URL including scheme (http/https), host and port.
+ It is possible to specify a relative URL starting with "/" here. In this case, all URLs
+ generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still
+ work, as the entire Spark UI is served through the same host and port.
+ http://mydomain.com/path/to/spark to
+ http://mydomain.com/path/to/spark/ (trailing slash after path prefix); otherwise
+ relative links on the master page do not work correctly.spark.ui.reverseProxy is turned on. This setting is not needed when the Spark
+ master web UI is directly reachable.